In [42]:
import os
import pandas as pd
import openslide
from PIL import ImageDraw

In [16]:
root = "/media/visiopharm5/WDGold/deeplearning/Hepatocarcinomes/slides_annotations_hammamatsu"

# load slide
f = openslide.OpenSlide(os.path.join(root, 'HMNT1747_bis - 2017-07-07 19.50.24.ndpi'))
properties=f.properties

# read annotations from csv
df = pd.read_csv(os.path.join(root, 'HMNT1747_bis_anno_df.csv'), index_col=0)
print(df.shape)
display(df.head(5))

(698, 5)


Unnamed: 0,id,type,shape,x,y
0,0,T,freehand,22245196,-10921624
1,0,T,freehand,21983674,-10921624
2,0,T,freehand,21591380,-10921624
3,0,T,freehand,21094476,-10921624
4,0,T,freehand,20545268,-10921624


### load annotations (csv)

In [28]:
# properties to translate coordinates of annotations
offsetx = int(properties['hamamatsu.XOffsetFromSlideCentre'])
print("offset_x = ", offsetx)
offsety = int(properties['hamamatsu.YOffsetFromSlideCentre'])
print("offset_y = ", offsety)
mppx = float(properties[openslide.PROPERTY_NAME_MPP_X])
print("mpp_x = ", mppx)
mppy = float(properties[openslide.PROPERTY_NAME_MPP_Y])
print("mpp_y = ", mppy)


# return the level for downsampling. Here : 40x/20=2
mag=float(properties[openslide.PROPERTY_NAME_OBJECTIVE_POWER])
downsample=mag/20

patch_size = 512

(width, height)=f.dimensions
print("Width= "+str(width)+", Height= "+str(height))
w = width / downsample
h = height / downsample

offset_x =  14114285
offset_y =  -190072
mpp_x =  0.2288329519450801
mpp_y =  0.2288329519450801
Width= 123008, Height= 101376


In [29]:
# translate annotations
list_point = []
types = []
shapes = []
anno = 0 # counter
list_point.append([])

for i in range(len(df)):
    if (df['id'][i] != anno):
        list_point.append([])
        types.append(df['type'][i-1])
        shapes.append(df['shape'][i-1])
        anno = anno + 1
    xcor = df['x'][i]
    xcor = xcor - offsetx
    xcor = xcor /1000 / mppx / downsample / patch_size + w/2
    ycor = df['y'][i]
    ycor = ycor - offsety
    ycor = ycor /1000 / mppy / downsample / patch_size + h/2
    list_point[anno].append((xcor, ycor))

types.append(df['type'][len(df)-1])
shapes.append(df['shape'][len(df)-1])

In [35]:
for type in types:
    if type != 'T' and type != 'NT':
        raise ValueError('Error in annotation type.')

In [None]:
if all(type == 'T' for type in types) or all(type == 'NT' for type in types):
    raise ValueError('Annotation error: only have 1 class.')

In [None]:
mask_anno = Image.new('L', (w, h))
draw = ImageDraw.Draw(mask_anno)
for i in range((anno+1)):
    if shapes[i] == 'freehand':
        if types[i] == 'T':
            draw.polygon(list_point[i], outline=255, fill=255)
        elif types[i] == 'NT':
            draw.polygon(list_point[i], outline=100, fill=100)
        else:
            raise ValueError('Annotation error: unknown class.')
    else:
        raise ValueError('Annotation error: not polygon.')
#mask_anno.save(out_path + '_grayscale_annotation_.png')

# resize the annotation to match the prob map
crop = mask_anno.crop((0, 0, heatmap.shape[1]*times, heatmap.shape[0]*times))
# default "resample" is set to Image.NEAREST. 
# NEAREST is the best here (downsampling) on all overall indicators with fine annotation. 
# The AUC and accuracy could be very different. Also depends on your PIL version.
annotations_im = crop.resize((heatmap.shape[1], heatmap.shape[0]))
annotations_im.save(out_path + '_annotation.png')
annotations = np.asanyarray(annotations_im)

In [78]:
import math
import numpy as np

In [149]:
slide_name = "TCGA-K7-A6G5-01Z-00-DX1.6681DB17-D11D-40DE-829C-2DB425BD3083.h5"

In [133]:
with openslide.OpenSlide(os.path.join("/media/visiopharm5/WDGold/deeplearning/MIL/CLAM/data/data_tcga_hcc", 
                                      slide_name[:-3]+".svs")) as f:
    print(np.asarray(f.dimensions))

    if (f.properties['aperio.AppMag'] == '20'): # 20x  ~= 0.5 or 1.0 (not correctly recognized)
        downsample = 1.0
    elif (f.properties['aperio.AppMag'] == '40'): # 40x pixelsize ~= 0.25
        downsample = 2.0
    else:
        raise Exception("The highest magnification should be 20x or 40x.")

    print(np.asarray([np.int64(math.floor(f.dimensions[0]/downsample)), np.int64(math.floor(f.dimensions[1]/downsample))]))

    print(np.asarray([np.float64(1), np.float64(1)]))

    print(slide_name[:-3])

[107249  89559]
[53624 44779]
[1. 1.]
TCGA-K7-A6G5-01Z-00-DX1.6681DB17-D11D-40DE-829C-2DB425BD3083


In [150]:
import h5py
filename = os.path.join("/media/visiopharm5/WDGold/deeplearning/MIL/CLAM/results/patches", 
                        slide_name)

with h5py.File(filename, "r") as f_ref:
    for k in range(len(f_ref["imgs"].attrs.values())): # coords have no attributes
        print(list(f_ref["imgs"].attrs.keys())[k])
        print(list(f_ref["imgs"].attrs.values())[k])

downsample
[1. 1.]
downsampled_level_dim
[53624 44779]
level_dim
[107249  89559]
patch_level
0
wsi_name
TCGA-K7-A6G5-01Z-00-DX1.6681DB17-D11D-40DE-829C-2DB425BD3083


In [146]:
slide_name = "HMNT0116_bis - 2017-06-11 13.48.07.h5"

In [143]:
with openslide.OpenSlide(os.path.join('/media/visiopharm5/WDGold/deeplearning/Hepatocarcinomes/slides_annotations_hammamatsu', slide_name[:-3]+'.ndpi')) as f:

    print(np.asarray(f.dimensions))
    
    if (f.properties[openslide.PROPERTY_NAME_OBJECTIVE_POWER] == '20'): # 20x  ~= 0.5 or 1.0 (not correctly recognized)
        downsample = 1.0
    elif (f.properties[openslide.PROPERTY_NAME_OBJECTIVE_POWER] == '40'): # 40x pixelsize ~= 0.25
        downsample = 2.0
    else:
        raise Exception("The highest magnification should be 20x or 40x.")

    print(np.asarray([np.int64(math.floor(f.dimensions[0]/downsample)), np.int64(math.floor(f.dimensions[1]/downsample))]))

    print(np.asarray([np.float64(1), np.float64(1)]))

    print(slide_name[:-3])

[99200 96768]
[49600 48384]
[1. 1.]
HMNT0116_bis - 2017-06-11 13.48.07


In [148]:
filename = os.path.join("/media/visiopharm5/WDRed(backup)/clam_extension/results/patches_mondor_tumor", 
                        slide_name)

with h5py.File(filename, "r") as f_ref:
    for k in range(len(f_ref["imgs"].attrs.values())): # coords have no attributes
        print(list(f_ref["imgs"].attrs.keys())[k])
        print(list(f_ref["imgs"].attrs.values())[k])

downsample
[1. 1.]
downsampled_level_dim
[49600 48384]
level_dim
[99200 96768]
patch_level
0
wsi_name
HMNT0116_bis - 2017-06-11 13.48.07


In [40]:
f.keys()

TypeError: Not a location id (invalid object ID)

In [59]:
import numpy as np
import math

path_slide = "/media/visiopharm5/WDGold/deeplearning/Hepatocarcinomes/slides_annotations_hammamatsu"

list_slide = []

for root, dirs, files in os.walk(path_slide):
    for file in files[:20]:
        if file.endswith("ndpi"):
            print(os.path.join(path_slide, file))
            with openslide.OpenSlide(os.path.join(path_slide, file)) as f_ref:
                  # f_ref.properties['aperio.AppMag']
                    if (f_ref.properties[openslide.PROPERTY_NAME_OBJECTIVE_POWER] == '20'): # 20x  ~= 0.5 or 1.0 (not correctly recognized)
                          downsample = 1.0
                    elif (f_ref.properties[openslide.PROPERTY_NAME_OBJECTIVE_POWER] == '40'): # 40x pixelsize ~= 0.25
                          downsample = 2.0
                    else:
                          raise Exception("The highest magnification should be 20x or 40x.")

                    list_slide.append(pd.DataFrame.from_dict({"downsampled_level_dim":set((math.floor(f_ref.dimensions[0]/downsample), math.floor(f_ref.dimensions[1]/downsample))),
                    "level_dim":set(f_ref.dimensions), "wsi_name":file[:-5]}, dtype=object).iloc[0,:])
                
df = pd.concat(list_slide, ignore_index=True)
print(df.shape)
display(df.head(5))

df.to_csv(os.path.join(path_slide, "slide_info.csv"))

/media/visiopharm5/WDGold/deeplearning/Hepatocarcinomes/slides_annotations_hammamatsu/HMNT0001.ndpi
/media/visiopharm5/WDGold/deeplearning/Hepatocarcinomes/slides_annotations_hammamatsu/HMNT0001_bis - 2017-05-31 15.03.09.ndpi
/media/visiopharm5/WDGold/deeplearning/Hepatocarcinomes/slides_annotations_hammamatsu/HMNT0003 - 2017-05-29 15.41.33.ndpi
/media/visiopharm5/WDGold/deeplearning/Hepatocarcinomes/slides_annotations_hammamatsu/HMNT0003_bis.ndpi
/media/visiopharm5/WDGold/deeplearning/Hepatocarcinomes/slides_annotations_hammamatsu/HMNT0004 - 2017-07-08 21.07.19.ndpi
/media/visiopharm5/WDGold/deeplearning/Hepatocarcinomes/slides_annotations_hammamatsu/HMNT0023 - 2017-08-19 00.15.23.ndpi
/media/visiopharm5/WDGold/deeplearning/Hepatocarcinomes/slides_annotations_hammamatsu/HMNT0023_bis - 2017-08-19 01.13.00.ndpi
(21,)


0    {59520, 41216}
1    {59520, 41216}
2          HMNT0001
3    {53760, 30464}
4    {53760, 30464}
Name: 0, dtype: object

In [35]:
with openslide.OpenSlide("/media/visiopharm5/WDGold/deeplearning/Hepatocarcinomes/slides_annotations_hammamatsu/HMNT0279 - 2017-07-15 19.24.50.ndpi") as f_ref:
    pass

OpenSlideUnsupportedFormatError: Unsupported or missing image file

In [62]:
list_anno = []
for roots, dirs, files in os.walk("/media/visiopharm5/WDGold/deeplearning/Hepatocarcinomes/slides_annotations_hammamatsu"):
    for file in files:
        if file.endswith(".qpdata"):
            list_anno.append(file)
display(list_anno)
print(len(list_anno))

['HMNT0472_P672772_03 - 2019-11-05 01.18.44.qpdata',
 'HMNT0516_P622582-05-HES.qpdata',
 'HMNT1781_P705474_01_HES - 2018-11-01 10.08.24.qpdata',
 'HMNT1885_P703089_B02_HES - 2018-11-01 09.22.15.qpdata',
 'HMNT2066_P734242_01_HES - 2018-11-01 16.13.44.qpdata',
 'HMNT2321_757692-05-HES.qpdata',
 'HMNT2326_P758321-B04-HES.qpdata',
 'HMNT2386_P761529-C01-HES.qpdata',
 'HMNT2387_P761471-A03-HES.qpdata',
 'HMNT2409_P764429_B01_HE - 2020-07-23 16.26.41.qpdata',
 'HMNT2410_P764429_C06_HE - 2020-07-23 16.13.36.qpdata']

11


In [70]:
from shutil import copyfile
import json
for root, dirs, files in os.walk("/media/visiopharm5/WDRed(backup)/qupath_mondor/data", topdown=False):
    for file in files:
        if file == "server.json":
            with open(os.path.join(root, file)) as f:
                server = json.load(f)
                if server["metadata"]["name"].split("/")[-1].replace(".ndpi", ".qpdata") in list_anno:
                    print(server["metadata"]["name"])
                    copyfile(os.path.join("/media/visiopharm5/WDGold/deeplearning/Hepatocarcinomes/slides_annotations_hammamatsu", server["metadata"]["name"].split("/")[-1].replace(".ndpi", ".qpdata")), os.path.join(root, "data.qpdata"))

HMNT2410_P764429_C06_HE - 2020-07-23 16.13.36.ndpi
HMNT0472_P672772_03 - 2019-11-05 01.18.44.ndpi
HMNT2409_P764429_B01_HE - 2020-07-23 16.26.41.ndpi
HMNT1781_P705474_01_HES - 2018-11-01 10.08.24.ndpi
HMNT1885_P703089_B02_HES - 2018-11-01 09.22.15.ndpi
HMNT2066_P734242_01_HES - 2018-11-01 16.13.44.ndpi
