In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import cv2
import json
import tifffile as tiff
import matplotlib.pyplot as plt
import pickle
import rasterio
from rasterio.windows import Window
import tensorflow as tf
# tf.debugging.set_log_device_placement(True)
import tensorflow_addons as tfa

In [3]:
gpu_devices = tf.config.experimental.list_physical_devices('GPU')
# print(gpu_devices)
if gpu_devices:
    for gpu_device in gpu_devices:
        print('device available:', gpu_device)

device available: PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')


In [4]:
MASKS = '../input/hubmap-kidney-segmentation/train.csv'
DATA = '../input/hubmap-kidney-segmentation/train/'

In [5]:
masks_df = pd.read_csv(MASKS).set_index('id')
# mask_df
masks_df.head()

Unnamed: 0_level_0,encoding
id,Unnamed: 1_level_1
2f6ecfcdf,296084587 4 296115835 6 296115859 14 296147109...
8242609fa,96909968 56 96941265 60 96972563 64 97003861 6...
aaa6a05cc,30989109 59 31007591 64 31026074 68 31044556 7...
cb2d976f4,78144363 5 78179297 15 78214231 25 78249165 35...
b9a3865fc,61271840 4 61303134 13 61334428 22 61365722 30...


In [6]:
sz = 256   #the size of tiles
reduce = 4 #reduce the original images by 4 times 
MASKS = '../input/hubmap-kidney-segmentation/train.csv'
DATA = '../input/hubmap-kidney-segmentation/train/'
OUT_TRAIN = 'train.zip'
OUT_MASKS = 'masks.zip'

In [7]:
#functions to convert encoding to mask and mask to encoding
def enc2mask(encs, shape):
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for m,enc in enumerate(encs):
        if isinstance(enc,np.float) and np.isnan(enc): continue
        s = enc.split()
        for i in range(len(s)//2):
            start = int(s[2*i]) - 1
            length = int(s[2*i+1])
            img[start:start+length] = 1 + m
    return img.reshape(shape).T

def mask2enc(mask, n=1):
    pixels = mask.T.flatten()
    encs = []
    for i in range(1,n+1):
        p = (pixels == i).astype(np.int8)
        if p.sum() == 0: encs.append(np.nan)
        else:
            p = np.concatenate([[0], p, [0]])
            runs = np.where(p[1:] != p[:-1])[0] + 1
            runs[1::2] -= runs[::2]
            encs.append(' '.join(str(x) for x in runs))
    return encs

def rle_encode_less_memory(img):
    #the image should be transposed
    pixels = img.T.flatten()
    
    # This simplified method requires first and last pixel to be zero
    pixels[0] = 0
    pixels[-1] = 0
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 2
    runs[1::2] -= runs[::2]
    
    return ' '.join(str(x) for x in runs)

In [8]:
def mask2rle(img):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    pixels= img.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)
 
def rle2mask(mask_rle, shape=(1600,256)):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (width,height) of array to return 
    Returns numpy array, 1 - mask, 0 - background

    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape).T


In [9]:
s_th = 40  #saturation blancking threshold
p_th = 1000*(sz//256)**2 #threshold for the minimum number of pixels

# class HuBMAPDataset(Dataset):
class HuBMAPDataset:
    def __init__(self, idx, sz=sz, reduce=reduce, encs=None):
        self.data = rasterio.open(os.path.join(DATA,idx+'.tiff'),num_threads='all_cpus')
        # some images have issues with their format 
        # and must be saved correctly before reading with rasterio
        if self.data.count != 3:
            subdatasets = self.data.subdatasets
            self.layers = []
            if len(subdatasets) > 0:
                for i, subdataset in enumerate(subdatasets, 0):
                    self.layers.append(rasterio.open(subdataset))
        self.shape = self.data.shape
        self.reduce = reduce
        self.sz = reduce*sz
        self.pad0 = (self.sz - self.shape[0]%self.sz)%self.sz
        self.pad1 = (self.sz - self.shape[1]%self.sz)%self.sz
        self.n0max = (self.shape[0] + self.pad0)//self.sz # no of tiles along image rows
        self.n1max = (self.shape[1] + self.pad1)//self.sz # no of tiles along image cols
        self.mask = enc2mask(encs,(self.shape[1],self.shape[0])) if encs is not None else None
                
    def __len__(self):
        return self.n0max*self.n1max # total no of tiles
    
    def __getitem__(self, idx):
        # the code below may be a little bit difficult to understand,
        # but the thing it does is mapping the original image to
        # tiles created with adding padding (like in the previous version of the kernel)
        # then the tiles are loaded with rasterio
        # n0,n1 - are the x and y index of the tile (idx = n0*self.n1max + n1)
        n0,n1 = idx//self.n1max, idx%self.n1max
        # x0,y0 - are the coordinates of the lower left corner of the tile in the image
        # negative numbers correspond to padding (which must not be loaded)
        x0,y0 = -self.pad0//2 + n0*self.sz, -self.pad1//2 + n1*self.sz

        # make sure that the region to read is within the image
        p00,p01 = max(0,x0), min(x0+self.sz,self.shape[0])
        p10,p11 = max(0,y0), min(y0+self.sz,self.shape[1])
        img = np.zeros((self.sz,self.sz,3),np.uint8)
        mask = np.zeros((self.sz,self.sz),np.uint8)
        # mapping the loaded region to the tile
        if self.data.count == 3:
            img[(p00-x0):(p01-x0),(p10-y0):(p11-y0)] = np.moveaxis(self.data.read([1,2,3],
                window=Window.from_slices((p00,p01),(p10,p11))), 0, -1)
        else:
            for i,layer in enumerate(self.layers):
                img[(p00-x0):(p01-x0),(p10-y0):(p11-y0),i] =\
                  layer.read(1,window=Window.from_slices((p00,p01),(p10,p11)))
        if self.mask is not None: mask[(p00-x0):(p01-x0),(p10-y0):(p11-y0)] = self.mask[p00:p01,p10:p11]
        
        if self.reduce != 1:
            img = cv2.resize(img,(self.sz//reduce,self.sz//reduce),
                             interpolation = cv2.INTER_AREA)
            mask = cv2.resize(mask,(self.sz//reduce,self.sz//reduce),
                             interpolation = cv2.INTER_NEAREST)
        #check for empty imges
        hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        h,s,v = cv2.split(hsv)
        #return -1 for empty images
        return img, mask, (-1 if (s>s_th).sum() <= p_th or img.sum() <= p_th else idx)

# Create the zipped files of 256x256 images

In [10]:
import zipfile
from tqdm.notebook import tqdm

x_tot,x2_tot = [],[]
with zipfile.ZipFile(OUT_TRAIN, 'w') as img_out,\
 zipfile.ZipFile(OUT_MASKS, 'w') as mask_out:
    for index, encs in tqdm(masks_df.iterrows(),total=len(masks_df)):
        #image+mask dataset
        ds = HuBMAPDataset(index,encs=encs)
        for i in range(len(ds)):
            im,m,idx = ds[i]
            if idx < 0: continue
                
            x_tot.append((im/255.0).reshape(-1,3).mean(0))
            x2_tot.append(((im/255.0)**2).reshape(-1,3).mean(0))
            
            #write data   
            im = cv2.imencode('.png',cv2.cvtColor(im, cv2.COLOR_RGB2BGR))[1] #Why?
            img_out.writestr(f'{index}_{idx:04d}.png', im)
            m = cv2.imencode('.png',m)[1]
            mask_out.writestr(f'{index}_{idx:04d}.png', m)
        
#image stats
img_avr =  np.array(x_tot).mean(0)
img_std =  np.sqrt(np.array(x2_tot).mean(0) - img_avr**2)
print('mean:',img_avr, ', std:', img_std)

  0%|          | 0/15 [00:00<?, ?it/s]

  s = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)


mean: [0.63701495 0.4709702  0.6817423 ] , std: [0.15978882 0.2245109  0.14173926]


In [11]:
mean = [0.63701495, 0.4709702,  0.6817423 ] 
std = [0.15978882, 0.2245109,  0.14173926]

# Setup for creating TFRecords

In [12]:
def _bytes_feature(value): # Could we have used a float feature?
  '''Returns a bytes_list from a string / byte.'''
  if isinstance(value, type(tf.constant(0))):
    value = value.numpy() # BytesList won't unpack a string from an EagerTensor.
  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def serialize_example(image, mask):
  """
  Creates a tf.train.Example message ready to be written to a file.
  """
  # Create a dictionary mapping the feature name to the tf.train.Example-compatible
  # data type.
  feature = {
      'image': _bytes_feature(image),
      'mask': _bytes_feature(mask),
  }

  # Create a Features message using tf.train.Example.

  example_proto = tf.train.Example(features=tf.train.Features(feature=feature))
  return example_proto.SerializeToString()

# Create loop for saving tfrecords

In [13]:
from tqdm.notebook import tqdm
import gc

if not os.path.exists('tfrecs'):
    os.makedirs('tfrecs')

for index, encs in tqdm(masks_df.iterrows(),total=len(masks_df)):
    ds = HuBMAPDataset(index, encs=encs)
    filename = 'tfrecs/'+index+'.tfrec'
    cnt = 0
    with tf.io.TFRecordWriter(filename) as writer:
        
        for i in range(len(ds)):
            im, m, flag = ds[i]
            
            if flag<0:continue
            
            im = cv2.cvtColor(im, cv2.COLOR_RGB2BGR)
            example = serialize_example(im.tobytes(), m.tobytes())
            writer.write(example)
            cnt+=1
        
    os.rename(filename, 'tfrecs/'+ index + '-'+str(cnt) +'.tfrec')
    gc.collect()

  0%|          | 0/15 [00:00<?, ?it/s]

In [14]:
cpus = tf.config.list_physical_devices('CPU')
gpus = tf.config.list_physical_devices('GPU')
print(cpus, gpus)

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')] [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


# Trying out sample code for learning and understanding purposes

In [15]:
import tensorflow as tf
tf.debugging.set_log_device_placement(True)
with tf.device('/GPU:0'):
    a = tf.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
    b = tf.constant([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
c = tf.matmul(a, b)
print(c)

tf.Tensor(
[[22. 28.]
 [49. 64.]], shape=(2, 2), dtype=float32)


In [16]:
with tf.device('/GPU:0'):
    aa = tf.data.Dataset.from_tensor_slices(c)
# ASK: This gives:-
# Executing op TensorSliceDataset in device /job:localhost/replica:0/task:0/device:CPU:0
# Even though I have enabled GPU.

In [17]:
# tf.debugging.set_log_device_placement(True)

# Create some tensors
a = tf.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
b = tf.constant([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
c = tf.matmul(a, b)

print(c)

tf.Tensor(
[[22. 28.]
 [49. 64.]], shape=(2, 2), dtype=float32)


# ASK: (OR LOOK AT LATER)
'By default, TensorFlow maps nearly all of the GPU memory of all GPUs (subject to CUDA_VISIBLE_DEVICES) visible to the process meaning' (from tensorflow doc page).
https://www.tensorflow.org/api_docs/python/tf/device

# Load dataset from TFRecords

In [18]:
AUTOTUNE = tf.data.experimental.AUTOTUNE
BATCH_SIZE = 64
val_len = int(len(masks_df) * 0.2)
train_len = len(masks_df) - val_len

In [19]:
!pip install -q -U albumentations

In [20]:
import random

In [21]:
from functools import partial
from albumentations import (
    Compose, RandomBrightness, JpegCompression, HueSaturationValue, RandomContrast, HorizontalFlip,
    Rotate
)
transforms = Compose([
            Rotate(limit=40),
            RandomBrightness(limit=0.1),
            HueSaturationValue(hue_shift_limit=20, sat_shift_limit=30, val_shift_limit=20, p=0.5),
            RandomContrast(limit=0.2, p=0.5),
            HorizontalFlip(),
        ])



In [22]:
def _parse_image_function(example_proto):
    
    image_feature_description = {'image':tf.io.FixedLenFeature([], tf.string, default_value=''),
                                'mask':tf.io.FixedLenFeature([], tf.string, default_value='')}
    
    single_sample = tf.io.parse_single_example(example_proto, image_feature_description)
#     import pdb;pdb.set_trace()
    image = tf.reshape(tf.io.decode_raw(single_sample['image'], out_type=np.dtype('uint8')),(sz, sz, 3))
    image = tf.cast(image, 'float32')/255.0 # is casting to float32 the correct way to debug?
    mask = tf.reshape(tf.io.decode_raw(single_sample['mask'], out_type='bool'),(sz, sz, 1)) # can we do (sz,sz)?
#     mask = tf.cast(mask, 'uint8')
    mask = tf.cast(mask, 'float32')
    
    return image, mask

def augment(image, mask):
    data = {"image":image,"mask":mask}
    random.seed(11)
    transformed = transforms(**data)
    transformed_image = transformed["image"]
    transformed_mask = transformed["mask"]
    return transformed_image, transformed_mask

def data_augment(image, mask):
    p_spatial = tf.random.uniform([], 0, 1.0, dtype=tf.float32)
    p_rotate = tf.random.uniform([], 0, 1.0, dtype=tf.float32)
    
    
    # Flips
    if p_spatial >= .2:
        image = tf.image.flip_left_right(image)
        mask = tf.image.flip_left_right(mask)
        image = tf.image.flip_up_down(image)
        mask = tf.image.flip_up_down(mask)
        
        
    # Rotates
    if p_rotate > .75:
        image = tf.image.rot90(image, k=3) # rotate 270º
        mask = tf.image.rot90(mask, k=3) # rotate 270º
    elif p_rotate > .5:
        image = tf.image.rot90(image, k=2) # rotate 180º
        mask = tf.image.rot90(mask, k=2) # rotate 180º
    elif p_rotate > .25:
        image = tf.image.rot90(image, k=1) # rotate 90º
        mask = tf.image.rot90(mask, k=1) # rotate 90º

    return image, mask

def process_aug(image, mask):
    aug_img, aug_mask = tf.numpy_function(func=augment, inp=[image, mask], Tout=[tf.float32, tf.float32])
    return aug_img, aug_mask

def load_dataset(filenames, flag):
    dataset = tf.data.TFRecordDataset(filenames)
    dataset = dataset.map(lambda ex: _parse_image_function(ex))
    if flag=="TRAIN":
        dataset = dataset.map(data_augment)
#         dataset = dataset.map(process_aug)
    return dataset

def get_dataset(filename, flag):
    dataset = load_dataset(filename, flag)
    dataset = dataset.shuffle(2048, reshuffle_each_iteration=True)
    dataset = dataset.prefetch(buffer_size=AUTOTUNE)
    dataset = dataset.batch(BATCH_SIZE)
    return dataset

# Training

In [23]:
FILENAMES = tf.io.gfile.glob("./tfrecs/*.tfrec")
TRAIN_FILES = FILENAMES[:train_len]
VALID_FILES = FILENAMES[train_len:]

In [24]:
TRAIN_FILES

['./tfrecs/b9a3865fc-772.tfrec',
 './tfrecs/54f2eec69-299.tfrec',
 './tfrecs/2f6ecfcdf-324.tfrec',
 './tfrecs/b2dc8411c-213.tfrec',
 './tfrecs/afa5e8098-1125.tfrec',
 './tfrecs/c68fe75ea-1136.tfrec',
 './tfrecs/e79de561c-308.tfrec',
 './tfrecs/aaa6a05cc-134.tfrec',
 './tfrecs/095bf7a1f-656.tfrec',
 './tfrecs/8242609fa-770.tfrec',
 './tfrecs/0486052bb-355.tfrec',
 './tfrecs/26dc41664-794.tfrec']

In [25]:
with tf.device('/GPU:0'):
    train_dataset = get_dataset(TRAIN_FILES, "TRAIN")
    valid_dataset = get_dataset(VALID_FILES, "VALID")
# statements below: why is everything executed on cpu?

In [26]:
for imgs, masks in train_dataset.take(2):
    print(imgs.shape, masks.shape)

(64, 256, 256, 3) (64, 256, 256, 1)
(64, 256, 256, 3) (64, 256, 256, 1)


# Make the model

In [27]:
# !pip install -U segmentation-models
!pip install --user --upgrade keras
!pip3 install --user --upgrade tensorflow

Collecting tensorflow
  Downloading tensorflow-2.5.0-cp37-cp37m-manylinux2010_x86_64.whl (454.3 MB)
[K     |████████████████████████████████| 454.3 MB 14 kB/s 
Collecting keras-nightly~=2.5.0.dev
  Downloading keras_nightly-2.5.0.dev2021032900-py2.py3-none-any.whl (1.2 MB)
[K     |████████████████████████████████| 1.2 MB 30.6 MB/s 
Collecting tensorboard~=2.5
  Downloading tensorboard-2.5.0-py3-none-any.whl (6.0 MB)
[K     |████████████████████████████████| 6.0 MB 15.3 MB/s 
Collecting grpcio~=1.34.0
  Downloading grpcio-1.34.1-cp37-cp37m-manylinux2014_x86_64.whl (4.0 MB)
[K     |████████████████████████████████| 4.0 MB 42.6 MB/s 
Collecting h5py~=3.1.0
  Downloading h5py-3.1.0-cp37-cp37m-manylinux1_x86_64.whl (4.0 MB)
[K     |████████████████████████████████| 4.0 MB 34.5 MB/s 
[?25hCollecting gast==0.4.0
  Downloading gast-0.4.0-py3-none-any.whl (9.8 kB)
Collecting tensorflow-estimator<2.6.0,>=2.5.0rc0
  Downloading tensorflow_estimator-2.5.0-py2.py3-none-any.w

In [28]:
!pip install -U segmentation-models

Collecting segmentation-models
  Downloading segmentation_models-1.0.1-py3-none-any.whl (33 kB)
Collecting keras-applications<=1.0.8,>=1.0.7
  Downloading Keras_Applications-1.0.8-py3-none-any.whl (50 kB)
[K     |████████████████████████████████| 50 kB 1.3 MB/s 
[?25hCollecting image-classifiers==1.0.0
  Downloading image_classifiers-1.0.0-py3-none-any.whl (19 kB)
Collecting efficientnet==1.0.0
  Downloading efficientnet-1.0.0-py3-none-any.whl (17 kB)
Installing collected packages: keras-applications, image-classifiers, efficientnet, segmentation-models
Successfully installed efficientnet-1.0.0 image-classifiers-1.0.0 keras-applications-1.0.8 segmentation-models-1.0.1


In [29]:
%env SM_FRAMEWORK=tf.keras

env: SM_FRAMEWORK=tf.keras


In [30]:
import segmentation_models as sm

Segmentation Models: using `tf.keras` framework.


In [31]:
tf.keras.backend.set_image_data_format('channels_last')

In [32]:
BACKBONE = 'resnet34'
# preprocess_input = sm.get_preprocessing(BACKBONE)
# x_train = preprocess_input(train_dataset) #what preporcessing does get_preprocessing do?
# x_valid = preprocess_input(valid_dataset)

In [33]:
sm.losses.bce_jaccard_loss

<segmentation_models.base.objects.SumOfLosses at 0x7efcdbefae50>

In [34]:
model = sm.Linknet(BACKBONE, encoder_weights='imagenet')
# model.summary()

Downloading data from https://github.com/qubvel/classification_models/releases/download/0.0.1/resnet34_imagenet_1000_no_top.h5


In [35]:
# model = sm.Unet(BACKBONE, encoder_weights='imagenet')
# model = sm.Linknet(BACKBONE, encoder_weights='imagenet')
model.compile(
    'Adam',
    loss=sm.losses.bce_jaccard_loss,
    metrics=[sm.metrics.iou_score]
)

In [36]:
# early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

In [37]:
history = model.fit(
   train_dataset,
   epochs=200,
   validation_data=valid_dataset
)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

# TESTING

In [38]:
# STARTER CODE GOT FROM SOMEWHERE ELSE. STILL WORKING ON IT.
names,preds = [],[]
for idx,row in tqdm(df_sample.iterrows(),total=len(df_sample)):
    idx = row['id']
    ds = HuBMAPDataset(idx)
    #rasterio cannot be used with multiple workers
    dl = DataLoader(ds,bs,num_workers=0,shuffle=False,pin_memory=True)
    mp = Model_pred(models,dl)
    #generate masks
    mask = torch.zeros(len(ds),ds.sz,ds.sz,dtype=torch.int8)
    for p,i in iter(mp): mask[i.item()] = p.squeeze(-1) > TH
    
    #reshape tiled masks into a single mask and crop padding
    mask = mask.view(ds.n0max,ds.n1max,ds.sz,ds.sz).\
        permute(0,2,1,3).reshape(ds.n0max*ds.sz,ds.n1max*ds.sz)
    mask = mask[ds.pad0//2:-(ds.pad0-ds.pad0//2) if ds.pad0 > 0 else ds.n0max*ds.sz,
        ds.pad1//2:-(ds.pad1-ds.pad1//2) if ds.pad1 > 0 else ds.n1max*ds.sz]
    
    #convert to rle
    #https://www.kaggle.com/bguberfain/memory-aware-rle-encoding
    rle = rle_encode_less_memory(mask.numpy())
    names.append(idx)
    preds.append(rle)
    del mask, ds, dl
    gc.collect()

NameError: name 'df_sample' is not defined