In [None]:
!pip install pycocotools

In [None]:
#https://www.kaggle.com/frlemarchand/generate-masks-from-weak-image-level-labels

import numpy as np
import pandas as pd
import cv2, os
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

import base64
from pycocotools import _mask as coco_mask
import typing as t
import zlib


In [None]:
def get_binary_mask(img):
    '''
    Turn the RGB image into grayscale before
    applying an Otsu threshold to obtain a
    binary segmentation
    '''
    
    blurred_img = cv2.GaussianBlur(img,(25,25),0)
    gray_img = cv2.cvtColor(blurred_img, cv2.COLOR_RGBA2GRAY)
    ret, otsu = cv2.threshold(gray_img, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
    
    kernel = np.ones((40,40),np.uint8)
    closed_mask = cv2.morphologyEx(otsu, cv2.MORPH_CLOSE, kernel)
    return closed_mask


def encode_binary_mask(mask: np.ndarray) -> t.Text:
  """Converts a binary mask into OID challenge encoding ascii text."""

  # check input mask --
  if mask.dtype != np.bool:
    raise ValueError(
        "encode_binary_mask expects a binary mask, received dtype == %s" %
        mask.dtype)

  mask = np.squeeze(mask)
  if len(mask.shape) != 2:
    raise ValueError(
        "encode_binary_mask expects a 2d mask, received shape == %s" %
        mask.shape)

  # convert input mask to expected COCO API input --
  mask_to_encode = mask.reshape(mask.shape[0], mask.shape[1], 1)
  mask_to_encode = mask_to_encode.astype(np.uint8)
  mask_to_encode = np.asfortranarray(mask_to_encode)

  # RLE encode mask --
  encoded_mask = coco_mask.encode(mask_to_encode)[0]["counts"]

  # compress and base64 encoding --
  binary_str = zlib.compress(encoded_mask, zlib.Z_BEST_COMPRESSION)
  base64_str = base64.b64encode(binary_str)
  return base64_str

In [None]:
df= pd.read_csv('../input/hpa-single-cell-image-classification/train.csv')
df.head()

In [None]:
img= cv2.imread('../input/hpaimage512-data/TarName/train/{}.jpg'.format(df.ID[0]))
plt.imshow(img)
plt.show()
img.shape

In [None]:
mask= get_binary_mask(img)
plt.imshow(mask//255, 'gray')
plt.show()
mask.shape

In [None]:
df= pd.read_csv('../input/hpa-single-cell-image-classification/sample_submission.csv')
df.head()

In [None]:
def get_encode(Id):
    img= cv2.imread('../input/hpaimage512-data/TarName/test/{}.jpg'.format(Id))
    mask= get_binary_mask(img)
    mask= (mask/255)>0
    return encode_binary_mask(mask)

In [None]:
df['encode']= df.ID.apply(get_encode)
df.head(10)

In [None]:
df.to_csv('encoded_csv.csv', index=False)