In [1]:
%load_ext autoreload
%autoreload 2
import numpy as np
import pandas as pd
from skimage import io
import os
from tqdm import tqdm_notebook
from PIL import Image
from PIL.TiffTags import TAGS
import matplotlib.pyplot as plt
%matplotlib inline
%cd /media/data/DeepFLaSH

In [2]:
from scipy import ndimage
from skimage.feature import peak_local_max
from skimage.segmentation import relabel_sequential
from skimage.measure import regionprops_table, label

# https://scikit-image.org/docs/stable/api/skimage.measure.html?highlight=measure%20perimeter#skimage.measure.regionprops
def measure_rois_io(mask, image,file_id, threshold=0.5, min_pixel=30, 
                   properties = ['mean_intensity', 
                                 #'max_intensity', 
                                 #'min_intensity', 
                                 #'filled_image',
                                 'label',
                                 #'centroid',
                                 'area',
                                 #'bbox'
                                ]):
    if mask.ndim == 3:
        mask = np.squeeze(mask, axis=2)

    # apply threshold to mask
    # bw = closing(mask > threshold, square(2))
    bw = (mask > threshold).astype(int)

    # label image regions
    label_image = label(bw, connectivity=2) # Falk p.13, 8-“connectivity”.

    # remove areas < min pixel   
    unique, counts = np.unique(label_image, return_counts=True)
    label_image[np.isin(label_image, unique[counts<min_pixel])] = 0
    
    # re-label image
    label_image, _ , _ = relabel_sequential(label_image, offset=1)  
    
    # measure region props
    if label_image.max()>0:
        props_inner = regionprops_table(label_image, image, properties=properties)
        df = pd.DataFrame(props_inner)
    else:
        df = pd.DataFrame(np.nan,index=[0],columns=properties)
         
    df['Nummer'] = file_id 
    return(df)

## Global Settings

In [3]:
CHANNELS_IMG = 1
CODER = ['staple']
MASK_FOLDER = 'pred_cv'
DATA_FOLDER = '01_data'
NEUN_FOLDER = 'data/labels_neun'
IMAGE = 'red'
NEUN = 'NeuN'
MASK = 'cFOS'
MIN_PIXEL = 30
META = None
LAB = 'wue_all'

In [5]:
assignment = pd.read_excel('data/Zuordnung_corr.xlsx', converters={'Nummer': lambda x: str(x).zfill(4)})
mask_path = MASK_FOLDER+'/{}/test/binary/'.format(LAB) #data/labels'
ens_list = sorted([x for x in next(os.walk(mask_path))[1] if not x.startswith('.')])
ens_list

['ens_w27a', 'ens_w27b', 'ens_w27c']

In [6]:
NEUN_SUBFOLDER = ['DG_whole', 'DG_whole', 'DG_whole', 'DG_supra', 'DG_infra']
AREA = ['CA1', 'CA3', 'DG','DG', 'DG']
SUBAREA = ['CA1', 'CA3', 'DG','DG_supra', 'DG_infra']

In [7]:
df_all = []
for subfold, area, subarea in zip(NEUN_SUBFOLDER, AREA, SUBAREA):
    print(subfold, area, subarea)
    df_zu = assignment[(assignment['Genotyp']=='WT') & 
                       (assignment['region']=='dHC') & 
                       (assignment['Area'].isin([area])) & 
                       (assignment['Experiment'].isin([1,2,3,4])) &
                       (assignment['Cross-coder Training'].isna()) & 
                       (assignment['Ausschluss von Analyse'].isna()) &
                       (assignment['broken'].isna())]
    file_ids = df_zu['Nummer'].tolist()
    
    image_list = [io.imread(os.path.join(DATA_FOLDER, img_name), as_gray=True) for 
              img_name in [s + '_' + IMAGE + '.tif' for s in file_ids]]
    neun_list = [io.imread(os.path.join(NEUN_FOLDER, subfold, img_name), as_gray=True) for 
              img_name in [s + '_' + NEUN + '.tif' for s in file_ids]]
    df_neun_list = []
    df_neun_list = [measure_rois_io(msk, img, fid) for msk, img, fid in zip(neun_list, image_list, file_ids)]
    df_neun = pd.concat(df_neun_list)
    df_neun['Neun_Area'] = df_neun['area']
    df_neun.to_csv(os.path.join('_bio_eval',LAB,LAB+'_'+NEUN+'_'+subarea+'_results.csv'), index=False)
    df_cfos_list = []
    for ens in tqdm_notebook(ens_list):
        fold_list = [x for x in next(os.walk(os.path.join(mask_path, ens)))[1] if not x.startswith('.')]
        model_list = [[x for x in next(os.walk(os.path.join(mask_path, ens, fold)))[1] if not x.startswith('.')][0] for fold in fold_list]
        for fold, model in zip(fold_list, model_list):
            model_path = os.path.join(mask_path, ens, fold, model)
            mask_list = [io.imread(os.path.join(model_path, x), as_gray=True).astype('int') for x in [s + '_' + MASK + '.png' for s in file_ids]]
            for msk, neun in zip(mask_list, neun_list):
                msk[neun==0]=0
            df_list = [measure_rois_io(msk, img, fid, min_pixel=MIN_PIXEL) for msk, img, fid in zip(mask_list, image_list, file_ids)]
            df_tmp = pd.concat(df_list)
            df_tmp['fold'] = fold
            df_tmp['unet'] = model
            df_tmp['ens'] = ens
            df_cfos_list += [df_tmp]
    df_cfos = pd.concat(df_cfos_list)
    df_cfos['subarea'] = subarea
    df_cfos = pd.merge(df_cfos, df_neun[['Nummer','Neun_Area']], how='left', on='Nummer')
    df_cfos.to_csv(os.path.join('_bio_eval',LAB,LAB+'_'+MASK+'_'+subarea+'_ROIs.csv'), index=False)   
    df_all += [df_cfos]
df_all = pd.concat(df_all)
df_all.to_csv(os.path.join('_bio_eval',LAB,LAB+'_'+MASK+'_ROIs.csv'), index=False)   

DG_whole CA1 CA1


HBox(children=(IntProgress(value=0, max=3), HTML(value='')))


DG_whole CA3 CA3


HBox(children=(IntProgress(value=0, max=3), HTML(value='')))


DG_whole DG DG


HBox(children=(IntProgress(value=0, max=3), HTML(value='')))


DG_supra DG DG_supra


HBox(children=(IntProgress(value=0, max=3), HTML(value='')))


DG_infra DG DG_infra


HBox(children=(IntProgress(value=0, max=3), HTML(value='')))




In [12]:
df_all

Unnamed: 0,mean_intensity,label,area,Nummer,fold,unet,ens,subarea,Neun_Area
0,0.095823,1.0,46.0,0000,bin_average,ens_10,ens_10,CA1,95877
1,0.086902,2.0,50.0,0000,bin_average,ens_10,ens_10,CA1,95877
2,0.384394,3.0,244.0,0000,bin_average,ens_10,ens_10,CA1,95877
3,0.088759,4.0,131.0,0000,bin_average,ens_10,ens_10,CA1,95877
4,0.091935,5.0,97.0,0000,bin_average,ens_10,ens_10,CA1,95877
5,0.110026,6.0,230.0,0000,bin_average,ens_10,ens_10,CA1,95877
6,0.103241,7.0,49.0,0000,bin_average,ens_10,ens_10,CA1,95877
7,0.092407,8.0,94.0,0000,bin_average,ens_10,ens_10,CA1,95877
8,0.092285,9.0,92.0,0000,bin_average,ens_10,ens_10,CA1,95877
9,0.088347,10.0,140.0,0000,bin_average,ens_10,ens_10,CA1,95877


# Old

In [29]:
df_zu = assignment[(assignment['Genotyp']=='WT') & 
                       (assignment['region']=='dHC') & 
                       (assignment['Area'].isin(AREA)) & 
                       (assignment['Experiment'].isin([1,2,3,4])) &
                       (assignment['Cross-coder Training'].isna()) & 
                       (assignment['Ausschluss von Analyse'].isna()) &
                       (assignment['broken'].isna())]

file_ids = df_zu['Nummer'].tolist()
len(file_ids)

93

### Load and adjust data

Load Masks

In [30]:
image_list = [io.imread(os.path.join(DATA_FOLDER, img_name), as_gray=True) for 
              img_name in [s + '_' + IMAGE + '.tif' for s in file_ids]]
neun_list = [io.imread(os.path.join(NEUN_FOLDER, NEUN_SUBFOLDER, img_name), as_gray=True) for 
              img_name in [s + '_' + NEUN + '.tif' for s in file_ids]]
len(image_list)

93

Calc NeuN Area

In [31]:
df_neun_list = []
df_neun_list = [measure_rois_io(msk, img, fid) for msk, img, fid in zip(neun_list, image_list, file_ids)]
df_neun = pd.concat(df_neun_list)
df_neun['Neun_Area'] = df_neun['area']
df_neun.to_csv(os.path.join('_bio_eval',LAB,LAB+'_'+NEUN+'_'+SUBAREA+'_results.csv'), index=False)

In [32]:
df_neun.head()

Unnamed: 0,mean_intensity,label,area,Nummer,Neun_Area
0,0.039498,1,58921,2,58921
0,0.061832,1,89510,5,89510
0,0.044552,1,93380,14,93380
0,0.077676,1,76679,19,76679
0,0.06413,1,72304,22,72304


### Loop over Models

In [36]:
df_cfos_list = []
for ens in tqdm_notebook(ens_list):
    fold_list = [x for x in next(os.walk(os.path.join(mask_path, ens)))[1] if not x.startswith('.')]
    model_list = [[x for x in next(os.walk(os.path.join(mask_path, ens, fold)))[1] if not x.startswith('.')][0] for fold in fold_list]
    for fold, model in zip(fold_list, model_list):
        model_path = os.path.join(mask_path, ens, fold, model)
        mask_list = [io.imread(os.path.join(model_path, x), as_gray=True).astype('int') for x in [s + '_' + MASK + '.png' for s in file_ids]]
        for msk, neun in zip(mask_list, neun_list):
            msk[neun==0]=0
        df_list = [measure_rois_io(msk, img, fid, min_pixel=MIN_PIXEL) for msk, img, fid in zip(mask_list, image_list, file_ids)]
        df_tmp = pd.concat(df_list)
        df_tmp['fold'] = fold
        df_tmp['unet'] = model
        df_tmp['ens'] = ens
        df_cfos_list += [df_tmp]
df_cfos = pd.concat(df_cfos_list)
df_cfos['subarea'] = SUBAREA
df_cfos = pd.merge(df_cfos, df_neun[['Nummer','Neun_Area']], how='left', on='Nummer')
df_cfos.to_csv(os.path.join('_bio_eval',LAB,LAB+'_'+MASK+'_'+SUBAREA+'_ROIs.csv'), index=False)

HBox(children=(IntProgress(value=0, max=14), HTML(value='')))

KeyboardInterrupt: 

In [35]:
#df_cfos = pd.merge(df_cfos, df_neun[['Nummer','Neun_Area']], how='left', on='Nummer')
df_cfos.head()

Unnamed: 0,mean_intensity,label,area,Nummer,fold,unet,ens,subarea,Neun_Area
0,0.151313,1,159,2,bin_average,ens_10,ens_10,DG_supra,58921
1,0.092782,2,138,2,bin_average,ens_10,ens_10,DG_supra,58921
2,0.328444,3,154,2,bin_average,ens_10,ens_10,DG_supra,58921
3,0.078659,4,86,2,bin_average,ens_10,ens_10,DG_supra,58921
4,0.143224,5,180,2,bin_average,ens_10,ens_10,DG_supra,58921


In [None]:
df_cfos.to_csv(os.path.join('_bio_eval',LAB,LAB+'_'+MASK+'_'+SUBAREA+'_ROIs.csv'), index=False)

### Check results

In [None]:
df_cfos[(df_cfos.Nummer=='1048') & (df_cfos.unet=='f27c_sc_cyc_mue_cFOS_staple_5.0010.h5')]#.groupby(['Nummer', 'unet']).agg(['mean', 'count'])

In [None]:
df_cfos.groupby(['Nummer', 'unet']).agg(['mean', 'count'])

In [None]:
conv_factor = 1.
if META is not None:
    img = Image.open(META)
    meta_dict = {TAGS[key] : img.tag[key] for key in img.tag.keys()}
    conv_factor = meta_dict['XResolution'][0][0]/meta_dict['XResolution'][0][1]
print(conv_factor)

## Pad NeuN Mask

In [None]:
h_add = (image_list[0].shape[0]-neun_list[0].shape[0])//2
w_add = (image_list[0].shape[1]-neun_list[0].shape[1])//2
neun_list = [np.pad(n, ((h_add, h_add),(w_add, w_add)), 'constant', constant_values=0) for n in neun_list]