In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

#import
import javabridge
import bioformats
import pickle
import numpy as np
import matplotlib.pyplot as plt
import re
import scipy
import os
from os import listdir
from os.path import isfile, join
from pathlib import Path

from skimage import exposure
from skimage import filters
from skimage import exposure
from skimage.transform import resize
from skimage.feature import register_translation

javabridge.start_vm(class_path=bioformats.JARS)

import tifffile
from skimage.transform import resize
from tifffile import imsave
from PIL import Image

import fastai
from fastai.vision import *
from fastai.callbacks import *
from fastai.utils.mem import *

from torchvision.models import vgg16_bn
from  functions_super_res_create_training_im import *

Paths were the different images can be found:

In [None]:

path = '../../../../../SCRATCH2/marvande/data/train/'

#path of big LR:
path_lr_whole = path + 'LR/0013_Scan1.qptiff'

#path where we will save the resized HR phenotype patches:
path_lr = path + 'HR/HR_patches_resized/tiff_files/'

#path to HR phenotype patches:
path_hr = path + 'HR/HR_patches_original/'

#path where we will save the HR patches in 6 channel form
#(originally 6 phenotype images with 3 channels)
path_save_hr = path + 'HR/HR_patches_train/tiff_files/'

### Exploring the data: 
Take a look at a few images and their shapes: 

In [None]:
#Load an HR patch and a LR scan to have a look at it:
whole_lr_scan = bioformats.load_image(path_lr_whole)

hr_patch = bioformats.load_image(path_hr + 'im3/0013_[39667,16250].im3')

hr_patho_patch = bioformats.load_image(
    path_hr + '0124_[43058,10798]_CK_OPAL690_path_view.tif')

print('Whole LR scan shape:' + str(whole_lr_scan.shape))
print('HR patch shape:' + str(hr_patch.shape))
print('HR patho patch shape:' + str(hr_patho_patch.shape))

Resize one phenotype patch to have a look at them:

In [None]:
#need to import again because somehow bugs:
from skimage.transform import resize

#Resize HR patch to LR patch size of (500, 669):
hr_patch = bioformats.load_image(
    path_hr + '0124_[43058,10798]_CK_OPAL690_path_view.tif')

#Take one channel:
print('One channel of HR patho patch:')
print('HR patch shape:' + str(hr_patch.shape))
plt.imshow(hr_patch[:, :, 1])
plt.show()

print('')
hr_patch_resized = resize(hr_patch, (500, 669), anti_aliasing=True)
print('Resized HR patch shape:' + str(hr_patch_resized.shape))
plt.imshow(hr_patch_resized[:, :, 1])

No visible difference. Looks good. 

### Creating our training data: 
We will resize the HR phenotype patches (6 phenotypes with 3 channels) to images of 6 channels (one per phenotype). We will create two training dataset for our model: 
- resized patches (LR) of size (500, 669, 6)
- original patches (HR) of size (500, 669, 6)

We wrote some functions for that (see `functions_super_res_create_training_im.py`) and will test them first to see if they behave properly. 

#### Test `correct_file()`: 
a function that tests whether a file has the location, patient and phenotype we want. 

In [None]:
#uncomment if you want to see the signature:
#?correct_file

In [None]:
phenotypes = ['CD4', 'CK', 'DAPI', 'CD3', 'FoxP3', 'CD8']
patient = '0013'
location = '[39667,16250]'

# Some checks for our function:
assert (correct_file(
    location, patient, phenotypes,
    '0013_[39667,16250]_Autofluorescence_path_view.tif') == False)
assert (correct_file(location, patient, phenotypes,
                     '0013_[39667,16250]_CD4_Rhod_path_view.tif') == True)
assert (correct_file(location, patient, phenotypes,
                     '0015_[39667,16250]_CD4_Rhod_path_view.tif') == False)
assert (correct_file(location, patient, phenotypes,
                     '0013_[39660,16253]_CD4_Rhod_path_view.tif') == False)

#### Test `multichannel_phenotype()`: 
a function that creates a 6 channel image for a location of an HR patch
    each channel is a phenotype output of inform. We plot an image with all its six channels to have a look. 

In [None]:
#uncomment if you want to see the signature:
#?multichannel_phenotype

In [None]:
#Test our functions:
patient = '0124'
location = '[43058,10798]'
phenotypes = ['CD4', 'CK', 'DAPI', 'CD3', 'FoxP3', 'CD8']
new_size = (500, 669, 3)

#create the image
arr = multichannel_phenotype(patient=patient,
                             location=location,
                             phenotypes=phenotypes,
                             path_hr=path_hr,
                             path_lr=path_lr,
                             new_size=new_size)
#plot the channels:
show_6_chann_phen(arr, phenotypes)

Attention: somehow bioformats is unable to read all 6 channels, use tifffile.imread (see above)

#### Create the LR and HR training data:

In [None]:
# Create LR train data: 
with os.scandir(path_hr) as entries:
        files = [entry.name for entry in entries if entry.is_file()]

patients = np.unique([file[0:4] for file in files])
print('Patients:', patients)

phenotypes = ['CD4', 'CK', 'DAPI', 'CD3', 'FoxP3', 'CD8']
new_size = (500, 669, 3)

create_train_data(patients, phenotypes, path_hr = path_hr, path_lr = path_lr, resize_size = new_size)

In [None]:
# Create HR train data:

patients = np.unique([file[0:4] for file in files])
print('Patients:', patients)

phenotypes = ['CD4', 'CK', 'DAPI', 'CD3', 'FoxP3', 'CD8']

create_train_data(patients=patients,
                  phenotypes=phenotypes,
                  path_hr=path_hr,
                  path_lr=path_save_hr,
                  to_resize=False)