In [None]:
from fastai.vision.all import *
import pandas as pd
import numpy as np
from tqdm.autonotebook import tqdm
import imageio

In [None]:
df = pd.read_csv('../input/hpa-public-data-negative-sample-dataset-2/df_negative.csv')
df['fname'] = df['Image'].apply(lambda r: r.split('/')[-1])
df.head()

In [None]:
path = Path('../input/hpa-public-data-negative-sample-dataset-2/negative_cells')

In [None]:
def get_size(fname):
    try:
        size = PILImage.create(path/f'{fname}_blue.png').shape[0]
        return size
    except: 
        return -1
    
get_size('1615_A8_5')

In [None]:
df['size'] = df['fname'].apply(get_size)
df.head()

In [None]:
df['size'].value_counts()

In [None]:
df = df[df['size'] != -1]

In [None]:
df = df.reset_index(drop=True)

In [None]:
len(df)

In [None]:
!pip install -q "../input/pycocotools/pycocotools-2.0-cp37-cp37m-linux_x86_64.whl"
!pip install -q "../input/hpapytorchzoozip/pytorch_zoo-master"
!pip install -q "../input/hpacellsegmentatormaster/HPA-Cell-Segmentation-master"

In [None]:
def build_image_names(image_id: str) -> list:
    mt = str(path/f'{image_id}_red.png')   
    er = str(path/f'{image_id}_yellow.png') 
    nu = str(path/f'{image_id}_blue.png')
    return [[mt], [er], [nu]]
build_image_names('738_G1_3')

In [None]:
import hpacellseg.cellsegmentator as cellsegmentator
from hpacellseg.utils import label_cell, label_nuclei

NUC_MODEL = '../input/hpacellsegmentatormodelweights/dpn_unet_nuclei_v1.pth'
CELL_MODEL = '../input/hpacellsegmentatormodelweights/dpn_unet_cell_3ch_v1.pth'

segmentator = cellsegmentator.CellSegmentator(
    NUC_MODEL,
    CELL_MODEL,
    scale_factor=0.25,
    device='cuda',
    padding=True,
    multi_channel_model=True
)

In [None]:
sdf = df.sample(frac=1, random_state=42)
sdf['size'].value_counts()

In [None]:
sub_dfs = []
for dim in sdf['size'].unique():
    x = sdf[sdf['size'] == dim].copy().reset_index(drop=True)
    sub_dfs.append(x)

In [None]:
cell_dir = 'cells'
nucl_dir = 'nucls'
os.makedirs(cell_dir, exist_ok=True)
os.makedirs(nucl_dir, exist_ok=True)

In [None]:
bs = 8
for sub in sub_dfs:
    print(f'Starting prediction for image size: {sub["size"].loc[0]}')
    for start in range(0, len(sub), bs):
        if start + bs > len(sub): end = len(sub)
        else: end = start + bs
            
        images = []
        image_ids = []
        for row in range(start, end):
            image_id = sub['fname'].loc[row]
            image_ids.append(image_id)
            img = build_image_names(image_id=image_id)
            images.append(img)
                    
        images = np.stack(images).squeeze()
        images = np.transpose(images).tolist()
               
        nuc_segmentations = segmentator.pred_nuclei(images[2])
        cell_segmentations = segmentator.pred_cells(images)
        
        print('Worked OK!')
        break
