## Running image preprocessing (qptiff-split.ipynb on the second cohort

In [1]:
import tifffile as tff
import xml.etree.ElementTree as ET
import matplotlib.pyplot as plt
import numpy as np
from cv2 import imwrite
from PIL import Image
import pickle as pk

In [2]:
# get channel names
raw_ome = '../Data/20220509_Castleman_LN_MCD_1.qptiff'
tifim = tff.TiffFile(raw_ome)

markers=[]
for p in tifim.series[0].pages:
    tif_xml = p.tags['ImageDescription'].value
    tif_meta = ET.XML(tif_xml)
    marker = tif_meta.find("Biomarker").text
    markers.append(marker)

In [4]:
with open(r'../Data/CellSeg/channelNames_cohort2.txt', 'w') as fp:
    for item in markers:
        # write each item on a new line
        fp.write("%s\n" % item)
    print('Done')

Done


## Making smaller set of images to test CellSeg

### Splitting MCD1 images
4 regions, 1 row

In [2]:
raw_ome = '../Data/20220509_Castleman_LN_MCD_1.qptiff'
tifim = tff.TiffFile(raw_ome)
nregion = 4

In [3]:
tifim.pages[0].shape[1]/4

10032.0

In [6]:
# individual images
for p in tifim.series[0].pages:
    tif_xml = p.tags['ImageDescription'].value
    tif_meta = ET.XML(tif_xml)
    marker = tif_meta.find("Biomarker").text
    print(marker)
    p_im = p.asarray()
    out = p_im[ 0:500 , 0:500 ]
    imwrite('../Data/R1/region1/small/{}.tif'.format(marker), out)

DAPI
IRF4
CLEC9A
IRF8
TCF4
CD123
BCL-6
FOXP3
Ki67
CD25
CD40
CD10
CD11c
PAX-5
PD-1
CD1c
BCL-2
CD134
CD68
CD69
CD11b
ICOS
CD163
MPO
CD30
CD5
Podoplanin
HLA-DR
CD20
CD8
CD138
CD23
Fascin
CD45
CD45RA
Mac2Gal3
CD4
Granzyme B
CD21
CD3e
CD15
CD34
CD31
Vimentin


In [5]:
# stacked images
arrays = []
for p in tifim.series[0].pages:
    tif_xml = p.tags['ImageDescription'].value
    tif_meta = ET.XML(tif_xml)
    marker = tif_meta.find("Biomarker").text
    # markers.append(marker)
    p_im = p.asarray()
    out = p_im[ 0:500 , 0:500 ]
    arrays.append(out)

In [7]:
out = np.stack(arrays, axis=2)
out.shape

(500, 500, 44)

In [8]:
with tff.TiffWriter('../Data/R1/region1/small-stack/all-stacked.tif') as tif:
    tif.write(out)

In [6]:
# Making regional test images stacked with all markers

for i_region in range(nregion):
    arrays = []
    for p in tifim.series[0].pages:
        tif_xml = p.tags['ImageDescription'].value
        tif_meta = ET.XML(tif_xml)
        # print(tif_meta.find("Biomarker").text)
        # marker = tif_meta.find("Biomarker").text
        # markers.append(marker)
        p_im = p.asarray()
        out = p_im[ : , (10032*i_region):((10032*i_region + 10032)-1)]
        # out = p_im[2000:3000,2000:3000] # cropping image down for testing
        arrays.append(out)
    out = np.stack(arrays, axis=0)
    out = np.expand_dims(out, axis=1)
    print(out.shape)
    with tff.TiffWriter('../Data/CellSeg/input/MCD1_regions/MCD1_reg{}_input.tif'.format(i_region+1)) as tif:
        tif.write(out)

(44, 1, 9504, 10031)
(44, 1, 9504, 10031)
(44, 1, 9504, 10031)
(44, 1, 9504, 10031)


### Splitting MCD3 images

3 regions, 1 row

In [11]:
raw_ome = '../Data/20220513_Castleman_LN_MCD3.qptiff'
tifim = tff.TiffFile(raw_ome)
nregion = 3

In [8]:
tifim.pages[0].shape[1]/3

10026.666666666666

In [12]:
for i_region in range(nregion):
    arrays = []
    for p in tifim.series[0].pages:
        tif_xml = p.tags['ImageDescription'].value
        tif_meta = ET.XML(tif_xml)
        p_im = p.asarray()
        out = p_im[ : , (10026*i_region):((10026*i_region + 10026)-1)]
        arrays.append(out)
    out = np.stack(arrays, axis=0)
    out = np.expand_dims(out, axis=1)
    print(out.shape)
    with tff.TiffWriter('../Data/CellSeg/input/MCD3_regions/MCD3_reg{}_input.tif'.format(i_region+1)) as tif:
        tif.write(out)

(44, 1, 9504, 10025)
(44, 1, 9504, 10025)
(44, 1, 9504, 10025)


### Splitting HVCD1 images

4 regions, 1 row

In [13]:
raw_ome = '../Data/20220515_Castleman_LN_HVCD1.qptiff'
tifim = tff.TiffFile(raw_ome)
nregion = 4

In [10]:
tifim.pages[0].shape[1]/4

10032.0

In [14]:
# for CellSeg
for i_region in range(nregion):
    arrays = []
    for p in tifim.series[0].pages:
        tif_xml = p.tags['ImageDescription'].value
        tif_meta = ET.XML(tif_xml)
        p_im = p.asarray()
        out = p_im[ : , (10032*i_region):((10032*i_region + 10032)-1)]
        arrays.append(out)
    out = np.stack(arrays, axis=0)
    out = np.expand_dims(out, axis=1)
    print(out.shape)
    with tff.TiffWriter('../Data/CellSeg/input/HVCD1_regions/HVCD1_reg{}_input.tif'.format(i_region+1)) as tif:
        tif.write(out)

(44, 1, 9504, 10031)
(44, 1, 9504, 10031)
(44, 1, 9504, 10031)
(44, 1, 9504, 10031)
