In [42]:
import os
import pandas as pd
import openslide
from PIL import ImageDraw

In [16]:
root = "/media/visiopharm5/WDGold/deeplearning/Hepatocarcinomes/slides_annotations_hammamatsu"

# load slide
f = openslide.OpenSlide(os.path.join(root, 'HMNT1747_bis - 2017-07-07 19.50.24.ndpi'))
properties=f.properties

# read annotations from csv
df = pd.read_csv(os.path.join(root, 'HMNT1747_bis_anno_df.csv'), index_col=0)
print(df.shape)
display(df.head(5))

(698, 5)


Unnamed: 0,id,type,shape,x,y
0,0,T,freehand,22245196,-10921624
1,0,T,freehand,21983674,-10921624
2,0,T,freehand,21591380,-10921624
3,0,T,freehand,21094476,-10921624
4,0,T,freehand,20545268,-10921624


### load annotations (csv)

In [28]:
# properties to translate coordinates of annotations
offsetx = int(properties['hamamatsu.XOffsetFromSlideCentre'])
print("offset_x = ", offsetx)
offsety = int(properties['hamamatsu.YOffsetFromSlideCentre'])
print("offset_y = ", offsety)
mppx = float(properties[openslide.PROPERTY_NAME_MPP_X])
print("mpp_x = ", mppx)
mppy = float(properties[openslide.PROPERTY_NAME_MPP_Y])
print("mpp_y = ", mppy)


# return the level for downsampling. Here : 40x/20=2
mag=float(properties[openslide.PROPERTY_NAME_OBJECTIVE_POWER])
downsample=mag/20

patch_size = 512

(width, height)=f.dimensions
print("Width= "+str(width)+", Height= "+str(height))
w = width / downsample
h = height / downsample

offset_x =  14114285
offset_y =  -190072
mpp_x =  0.2288329519450801
mpp_y =  0.2288329519450801
Width= 123008, Height= 101376


In [29]:
# translate annotations
list_point = []
types = []
shapes = []
anno = 0 # counter
list_point.append([])

for i in range(len(df)):
    if (df['id'][i] != anno):
        list_point.append([])
        types.append(df['type'][i-1])
        shapes.append(df['shape'][i-1])
        anno = anno + 1
    xcor = df['x'][i]
    xcor = xcor - offsetx
    xcor = xcor /1000 / mppx / downsample / patch_size + w/2
    ycor = df['y'][i]
    ycor = ycor - offsety
    ycor = ycor /1000 / mppy / downsample / patch_size + h/2
    list_point[anno].append((xcor, ycor))

types.append(df['type'][len(df)-1])
shapes.append(df['shape'][len(df)-1])

In [35]:
for type in types:
    if type != 'T' and type != 'NT':
        raise ValueError('Error in annotation type.')

In [None]:
if all(type == 'T' for type in types) or all(type == 'NT' for type in types):
    raise ValueError('Annotation error: only have 1 class.')

In [None]:
mask_anno = Image.new('L', (w, h))
draw = ImageDraw.Draw(mask_anno)
for i in range((anno+1)):
    if shapes[i] == 'freehand':
        if types[i] == 'T':
            draw.polygon(list_point[i], outline=255, fill=255)
        elif types[i] == 'NT':
            draw.polygon(list_point[i], outline=100, fill=100)
        else:
            raise ValueError('Annotation error: unknown class.')
    else:
        raise ValueError('Annotation error: not polygon.')
#mask_anno.save(out_path + '_grayscale_annotation_.png')

# resize the annotation to match the prob map
crop = mask_anno.crop((0, 0, heatmap.shape[1]*times, heatmap.shape[0]*times))
# default "resample" is set to Image.NEAREST. 
# NEAREST is the best here (downsampling) on all overall indicators with fine annotation. 
# The AUC and accuracy could be very different. Also depends on your PIL version.
annotations_im = crop.resize((heatmap.shape[1], heatmap.shape[0]))
annotations_im.save(out_path + '_annotation.png')
annotations = np.asanyarray(annotations_im)

In [78]:
import math
import numpy as np

In [149]:
slide_name = "TCGA-K7-A6G5-01Z-00-DX1.6681DB17-D11D-40DE-829C-2DB425BD3083.h5"

In [133]:
with openslide.OpenSlide(os.path.join("/media/visiopharm5/WDGold/deeplearning/MIL/CLAM/data/data_tcga_hcc", 
                                      slide_name[:-3]+".svs")) as f:
    print(np.asarray(f.dimensions))

    if (f.properties['aperio.AppMag'] == '20'): # 20x  ~= 0.5 or 1.0 (not correctly recognized)
        downsample = 1.0
    elif (f.properties['aperio.AppMag'] == '40'): # 40x pixelsize ~= 0.25
        downsample = 2.0
    else:
        raise Exception("The highest magnification should be 20x or 40x.")

    print(np.asarray([np.int64(math.floor(f.dimensions[0]/downsample)), np.int64(math.floor(f.dimensions[1]/downsample))]))

    print(np.asarray([np.float64(1), np.float64(1)]))

    print(slide_name[:-3])

[107249  89559]
[53624 44779]
[1. 1.]
TCGA-K7-A6G5-01Z-00-DX1.6681DB17-D11D-40DE-829C-2DB425BD3083


In [150]:
import h5py
filename = os.path.join("/media/visiopharm5/WDGold/deeplearning/MIL/CLAM/results/patches", 
                        slide_name)

with h5py.File(filename, "r") as f_ref:
    for k in range(len(f_ref["imgs"].attrs.values())): # coords have no attributes
        print(list(f_ref["imgs"].attrs.keys())[k])
        print(list(f_ref["imgs"].attrs.values())[k])

downsample
[1. 1.]
downsampled_level_dim
[53624 44779]
level_dim
[107249  89559]
patch_level
0
wsi_name
TCGA-K7-A6G5-01Z-00-DX1.6681DB17-D11D-40DE-829C-2DB425BD3083


In [146]:
slide_name = "HMNT0116_bis - 2017-06-11 13.48.07.h5"

In [143]:
with openslide.OpenSlide(os.path.join('/media/visiopharm5/WDGold/deeplearning/Hepatocarcinomes/slides_annotations_hammamatsu', slide_name[:-3]+'.ndpi')) as f:

    print(np.asarray(f.dimensions))
    
    if (f.properties[openslide.PROPERTY_NAME_OBJECTIVE_POWER] == '20'): # 20x  ~= 0.5 or 1.0 (not correctly recognized)
        downsample = 1.0
    elif (f.properties[openslide.PROPERTY_NAME_OBJECTIVE_POWER] == '40'): # 40x pixelsize ~= 0.25
        downsample = 2.0
    else:
        raise Exception("The highest magnification should be 20x or 40x.")

    print(np.asarray([np.int64(math.floor(f.dimensions[0]/downsample)), np.int64(math.floor(f.dimensions[1]/downsample))]))

    print(np.asarray([np.float64(1), np.float64(1)]))

    print(slide_name[:-3])

[99200 96768]
[49600 48384]
[1. 1.]
HMNT0116_bis - 2017-06-11 13.48.07


In [148]:
filename = os.path.join("/media/visiopharm5/WDRed(backup)/clam_extension/results/patches_mondor_tumor", 
                        slide_name)

with h5py.File(filename, "r") as f_ref:
    for k in range(len(f_ref["imgs"].attrs.values())): # coords have no attributes
        print(list(f_ref["imgs"].attrs.keys())[k])
        print(list(f_ref["imgs"].attrs.values())[k])

downsample
[1. 1.]
downsampled_level_dim
[49600 48384]
level_dim
[99200 96768]
patch_level
0
wsi_name
HMNT0116_bis - 2017-06-11 13.48.07


In [40]:
for root, dirs,files in os.walk()

TypeError: Not a location id (invalid object ID)

In [62]:
list_anno = []
for roots, dirs, files in os.walk("/media/visiopharm5/WDGold/deeplearning/Hepatocarcinomes/slides_annotations_hammamatsu"):
    for file in files:
        if file.endswith(".qpdata"):
            list_anno.append(file)
display(list_anno)
print(len(list_anno))

['HMNT0472_P672772_03 - 2019-11-05 01.18.44.qpdata',
 'HMNT0516_P622582-05-HES.qpdata',
 'HMNT1781_P705474_01_HES - 2018-11-01 10.08.24.qpdata',
 'HMNT1885_P703089_B02_HES - 2018-11-01 09.22.15.qpdata',
 'HMNT2066_P734242_01_HES - 2018-11-01 16.13.44.qpdata',
 'HMNT2321_757692-05-HES.qpdata',
 'HMNT2326_P758321-B04-HES.qpdata',
 'HMNT2386_P761529-C01-HES.qpdata',
 'HMNT2387_P761471-A03-HES.qpdata',
 'HMNT2409_P764429_B01_HE - 2020-07-23 16.26.41.qpdata',
 'HMNT2410_P764429_C06_HE - 2020-07-23 16.13.36.qpdata']

11


In [70]:
from shutil import copyfile
import json
for root, dirs, files in os.walk("/media/visiopharm5/WDRed(backup)/qupath_mondor/data", topdown=False):
    for file in files:
        if file == "server.json":
            with open(os.path.join(root, file)) as f:
                server = json.load(f)
                if server["metadata"]["name"].split("/")[-1].replace(".ndpi", ".qpdata") in list_anno:
                    print(server["metadata"]["name"])
                    copyfile(os.path.join("/media/visiopharm5/WDGold/deeplearning/Hepatocarcinomes/slides_annotations_hammamatsu", server["metadata"]["name"].split("/")[-1].replace(".ndpi", ".qpdata")), os.path.join(root, "data.qpdata"))

HMNT2410_P764429_C06_HE - 2020-07-23 16.13.36.ndpi
HMNT0472_P672772_03 - 2019-11-05 01.18.44.ndpi
HMNT2409_P764429_B01_HE - 2020-07-23 16.26.41.ndpi
HMNT1781_P705474_01_HES - 2018-11-01 10.08.24.ndpi
HMNT1885_P703089_B02_HES - 2018-11-01 09.22.15.ndpi
HMNT2066_P734242_01_HES - 2018-11-01 16.13.44.ndpi


In [236]:
list_name_wsi = []
import json
import random
for root, dirs, files in os.walk("/media/visiopharm5/WDRed(backup)/qupath_mondor/data", topdown=False):
    if root == "/media/visiopharm5/WDRed(backup)/qupath_mondor/data":
        print(len(dirs))
    for file in files:
        if file == "server.json":
            with open(os.path.join(root, file)) as f:
                server = json.load(f)
                list_name_wsi.append(server["metadata"]["name"].split("/")[-1])
set_name_wsi = set(list_name_wsi) # sometimes the folders in data will be more than the amount of acutal images in the project, if you have ever deleted items
print(len(set_name_wsi)) 
print(random.sample(set_name_wsi, 5))

266
261
['HMNT0981 - 2017-06-30 09.41.17.ndpi', 'HMNT0280 - 2017-06-16 01.16.02.ndpi', 'HMNT0576 - 2017-06-05 00.39.43.ndpi', 'HMNT0129_bis - 2017-06-17 17.15.39.ndpi', 'HMNT2387_P761471-A03-HES.svs']


In [243]:
list_name_wsi = []
for name in set_name_wsi:
    list_name_wsi.append(name.replace(".ndpi", ".svs").replace(".svs", ""))
print(len(list_name_wsi))
print(list_name_wsi[:5])

261
['HMNT1029_bis - 2017-07-07 20.46.39', 'HMNT0573_bis - 2017-06-03 02.10.00', 'HMNT0488 - 2017-06-19 06.25.29', 'HMNT0750 - 2017-06-08 19.38.54', 'HMNT0393_bis - 2017-06-17 02.22.36']


In [244]:
set(list_name_wsi) - set(dir1)

{'HMNT0073 - 2017-06-17 21.44.27',
 'HMNT0505_bis - 2017-06-09 20.24.57',
 'HMNT0578 - 2017-06-11 05.30.04',
 'HMNT0885 - 2017-06-16 23.34.59'}

In [237]:
import os
path1 = '/media/visiopharm5/WDRed(backup)/qupath_mondor/copying'

dir1 = []
lenfile1 = []
for root, dirs, files in os.walk(path1):
    if root != path1:
        dir1.append(root.split(path1+"/")[1])
        lenfile1.append(len(files))
print(len(dir1))
print(len(lenfile1))
print(dir1[0:5])
print(lenfile1[0:5])

86
86
['HMNT0399_bis - 2017-06-16 19.11.30', 'HMNT0401 - 2017-06-02 15.22.36', 'HMNT0401_bis - 2017-06-02 16.04.57', 'HMNT0409_bis - 2017-07-16 06.00.19', 'HMNT0439 - 2017-06-17 23.17.22']
[31640, 33910, 37752, 44898, 38582]


In [238]:
path1 = '/media/visiopharm5/WDRed(backup)/qupath_mondor/encoded'
for root, dirs, files in os.walk(path1):
    if root != path1:
        dir1.append(root.split(path1+"/")[1])
        lenfile1.append(len(files))
print(len(dir1))
print(len(lenfile1))

143
143


In [239]:
path1 = '/media/visiopharm5/WDRed(backup)/qupath_mondor/tiles_20x_256'
for root, dirs, files in os.walk(path1):
    if root != path1:
        dir1.append(root.split(path1+"/")[1])
        lenfile1.append(len(files))
print(len(dir1))
print(len(lenfile1))

144
144


In [240]:
path1 = '/media/visiopharm5/WDRed(backup)/qupath_mondor/tmp'
for root, dirs, files in os.walk(path1):
    if root != path1:
        dir1.append(root.split(path1+"/")[1])
        lenfile1.append(len(files))
print(len(dir1))
print(len(lenfile1))

257
257


In [284]:
import pandas as pd
df1 = pd.DataFrame([dir1, lenfile1]).T
df1.columns = ["name", "ntiles"]
print(df1.shape)
display(df1.head(5))

(257, 2)


Unnamed: 0,name,ntiles
0,HMNT0399_bis - 2017-06-16 19.11.30,31640
1,HMNT0401 - 2017-06-02 15.22.36,33910
2,HMNT0401_bis - 2017-06-02 16.04.57,37752
3,HMNT0409_bis - 2017-07-16 06.00.19,44898
4,HMNT0439 - 2017-06-17 23.17.22,38582


In [285]:
list_h5 = []
for root, dirs, files in os.walk("/media/visiopharm5/WDRed(backup)/clam_extension/results/patches_mondor_tumor"):
    for file in files:
        list_h5.append(file.split(".h5")[0])
print(len(list_h5))
print(list_h5[:5])

62
['HMNT0135_bis - 2017-06-11 15.22.53', 'HMNT0280 - 2017-06-16 01.16.02', 'HMNT0071 - 2017-06-15 11.35.00', 'HMNT0071_bis - 2017-06-15 12.25.18', 'HMNT0073_bis - 2017-06-17 22.30.07']


In [286]:
df1.loc[df1[df1["name"].isin(list_h5)].index, 'h5'] = "yes"
print(df1.shape)
display(df1.head(5))

(257, 3)


Unnamed: 0,name,ntiles,h5
0,HMNT0399_bis - 2017-06-16 19.11.30,31640,yes
1,HMNT0401 - 2017-06-02 15.22.36,33910,yes
2,HMNT0401_bis - 2017-06-02 16.04.57,37752,yes
3,HMNT0409_bis - 2017-07-16 06.00.19,44898,
4,HMNT0439 - 2017-06-17 23.17.22,38582,yes


In [287]:
set(list_h5)-set(df[df1["h5"]=="yes"].name)

set()

In [272]:
path2 = '/media/visiopharm5/Storage/tiles'

dir2 = []
lenfile2 = []
for root, dirs, files in os.walk(path2):
    if root != path2:
        dir2.append(root.split(path2+"/")[1])
        lenfile2.append(len(files))
print(len(dir2))
print(len(lenfile2))
print(dir2[0:5])
print(lenfile2[0:5])

47
47
['HMNT0578_bis - 2017-06-11 06.12.53', 'HMNT0399_bis - 2017-06-16 19.11.30', 'HMNT0401 - 2017-06-02 15.22.36', 'HMNT0401_bis - 2017-06-02 16.04.57', 'HMNT0409_bis - 2017-07-16 06.00.19']
[20007, 31640, 33910, 37752, 44898]


In [274]:
path2 = '/media/visiopharm5/Qinghe/tiles'
for root, dirs, files in os.walk(path2):
    if root != path2:
        dir2.append(root.split(path2+"/")[1])
        lenfile2.append(len(files))
print(len(dir2))
print(len(lenfile2))

61
61


In [288]:
df2 = pd.DataFrame([dir2, lenfile2]).T
df2.columns = ["name", "ntiles_copied"]
print(df2.shape)
display(df2.head(5))

(61, 2)


Unnamed: 0,name,ntiles_copied
0,HMNT0578_bis - 2017-06-11 06.12.53,20007
1,HMNT0399_bis - 2017-06-16 19.11.30,31640
2,HMNT0401 - 2017-06-02 15.22.36,33910
3,HMNT0401_bis - 2017-06-02 16.04.57,37752
4,HMNT0409_bis - 2017-07-16 06.00.19,44898


In [289]:
df = df1.merge(df2, on="name", how = 'outer')
print(df.shape)
display(df.head(10))

(257, 4)


Unnamed: 0,name,ntiles,h5,ntiles_copied
0,HMNT0399_bis - 2017-06-16 19.11.30,31640,yes,31640
1,HMNT0401 - 2017-06-02 15.22.36,33910,yes,33910
2,HMNT0401_bis - 2017-06-02 16.04.57,37752,yes,37752
3,HMNT0409_bis - 2017-07-16 06.00.19,44898,,44898
4,HMNT0439 - 2017-06-17 23.17.22,38582,yes,38582
5,HMNT0439_bis - 2017-06-18 00.05.21,52450,yes,52450
6,HMNT0441_bis - 2017-06-04 15.48.14,80618,yes,80618
7,HMNT0454 - 2017-06-04 23.33.35,37674,,37674
8,HMNT0454_bis - 2017-06-05 00.11.21,33448,,33448
9,HMNT0455 - 2017-06-16 16.46.38,20582,,20582


In [185]:
dir11 = []
import re
for dirs in dir1:
    if dirs!= '/media/visiopharm5/WDRed(backup)/qupath_mondor/tmp' and dirs!= '/media/visiopharm5/WDRed(backup)/qupath_mondor/copying':
        dir11.append(re.split('/media/visiopharm5/WDRed\(backup\)/qupath_mondor/tmp/|/media/visiopharm5/WDRed\(backup\)/qupath_mondor/copying/', dirs)[1])

In [166]:
dir22 = []
import re
for dirs in dir2:
    if dirs!= '/media/visiopharm5/Storage/tiles' and dirs!='/media/visiopharm5/B489-8CF1' and dirs!='/media/visiopharm5/Qinghe':
        dir22.append(re.split('/media/visiopharm5/Storage/tiles/|/media/visiopharm5/B489\-8CF1/|/media/visiopharm5/Qinghe/', dirs)[1])

In [197]:
print(len(dir1))
print(len(dir2))

199
74


In [170]:
display(df2.head(5))
print(df1.shape)
print(df2.shape)

Unnamed: 0,name,ntiles
0,HMNT0102 - 2017-07-14 22.14.28,13904
1,HMNT0102_bis - 2017-07-14 23.08.05,35700
2,HMNT0110_bis - 2017-06-16 22.39.18,74556
3,HMNT0111 - 2017-06-17 19.47.09,52824
4,HMNT0111_bis - 2017-06-17 20.43.09,45326


(113, 2)
(102, 2)


In [144]:
df2[df2["name"]=="HMNT0574_bis - 2017-06-13 20.19.34"]

Unnamed: 0,name,ntiles
66,HMNT0574_bis - 2017-06-13 20.19.34,27140


In [142]:
df2.dtypes

name      object
ntiles    object
dtype: object

In [148]:
rm(df)

/bin/bash: -c: line 0: syntax error near unexpected token `df'
/bin/bash: -c: line 0: `rm (df)'


(113, 3)


Unnamed: 0,name,ntiles_x,ntiles_y
0,HMNT0574_bis - 2017-06-13 20.19.34,27140,27140.0
1,HMNT0620_bis - 2017-07-24 22.21.49,48054,48054.0
2,HMNT0672 - 2017-06-04 07.13.30,69180,20731.0
3,HMNT0672_bis - 2017-06-04 08.15.37,61078,
4,HMNT0689 - 2017-06-03 10.43.21,40706,


In [290]:
df.to_csv("~/Downloads/copied_tiles_info.csv")
# df1.to_csv("/media/visiopharm5/WDRed(backup)/qupath_mondor/npatches.csv")

In [161]:
# df2 = pd.DataFrame([dir22, lenfile2]).T
# df2.columns = ["name", "ntiles"]
# display(df2.head(5))

Unnamed: 0,name,ntiles
0,HMNT0102 - 2017-07-14 22.14.28,13904
1,HMNT0102_bis - 2017-07-14 23.08.05,35700
2,HMNT0110_bis - 2017-06-16 22.39.18,74556
3,HMNT0111 - 2017-06-17 19.47.09,52824
4,HMNT0111_bis - 2017-06-17 20.43.09,45326


In [162]:
df2.to_csv("/media/visiopharm5/WDRed(backup)/qupath_mondor/npatches.csv")

In [294]:
from tqdm.notebook import tqdm
tile_id = []
mask_id = []
path_tiles_qp = "/media/visiopharm5/WDRed(backup)/qupath_mondor/copying"
slide_id = ['HMNT2066_P734242_01_HES - 2018-11-01 16.13.44','HMNT2321_757692-05-HES','HMNT2387_P761471-A03-HES']
for name in tqdm(slide_id):
    tmp_tile = []
    tmp_mask = []
    for root, dirs, files in os.walk(os.path.join(path_tiles_qp, name)):
        if len(files) != 0:
            for file in tqdm(files):
                if file.endswith(".tif"):
                    tmp_tile.append(file)
                elif file.endswith(".png"):
                    tmp_mask.append(file)
            tile_id.append(tmp_tile)
            mask_id.append(tmp_mask)

HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=15288.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=20900.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=7300.0), HTML(value='')))





In [296]:
print(len(tile_id[1]))

10450


In [29]:
import pandas as pd
df = pd.read_csv("/media/visiopharm5/WDRed(backup)/qupath_mondor/npatches.csv", index_col=0)
print(df.shape)
display(df.head(5))

(259, 2)


Unnamed: 0,name,ntiles
0,HMNT0399_bis - 2017-06-16 19.11.30,31640
1,HMNT0401 - 2017-06-02 15.22.36,33910
2,HMNT0401_bis - 2017-06-02 16.04.57,37752
3,HMNT0409_bis - 2017-07-16 06.00.19,44898
4,HMNT0439 - 2017-06-17 23.17.22,38582


In [30]:
import os
slides = []
for root, dirs, files in os.walk("/media/visiopharm5/WDRed(backup)/clam_extension/results/patches_mondor_tumor"):
    for file in files:
        slides.append(file[:-3])

In [31]:
set(df.name.to_list()) - set(slides) #1

set()

In [32]:
set(slides) - set(df.name.to_list()) #7

set()