In [None]:
!pip install "../input/pycocotools/pycocotools-2.0-cp37-cp37m-linux_x86_64.whl"
!pip install "../input/hpapytorchzoozip/pytorch_zoo-master"
!pip install "../input/hpacellsegmentatormaster/HPA-Cell-Segmentation-master"
NUC_MODEL = '../input/hpacellsegmentatormodelweights/dpn_unet_nuclei_v1.pth'
CELL_MODEL = '../input/hpacellsegmentatormodelweights/dpn_unet_cell_3ch_v1.pth'

In [None]:
import glob
import os
import re

import base64

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import cv2
from tqdm import tqdm_notebook
from pycocotools import _mask as coco_mask
import typing as t
import zlib
import hpacellseg.cellsegmentator as cellsegmentator
from hpacellseg.utils import label_cell, label_nuclei

# Build the Segmentator
segmentator = cellsegmentator.CellSegmentator(
    NUC_MODEL,
    CELL_MODEL,
    scale_factor=0.25,
    device='cuda',
    padding=True,
    multi_channel_model=True
)

In [None]:
def encode_binary_mask(mask: np.ndarray) -> t.Text:
  """Converts a binary mask into OID challenge encoding ascii text."""

  # check input mask --
  if mask.dtype != np.bool:
    raise ValueError(
        "encode_binary_mask expects a binary mask, received dtype == %s" %
        mask.dtype)

  mask = np.squeeze(mask)
  if len(mask.shape) != 2:
    raise ValueError(
        "encode_binary_mask expects a 2d mask, received shape == %s" %
        mask.shape)

  # convert input mask to expected COCO API input --
  mask_to_encode = mask.reshape(mask.shape[0], mask.shape[1], 1)
  mask_to_encode = mask_to_encode.astype(np.uint8)
  mask_to_encode = np.asfortranarray(mask_to_encode)

  # RLE encode mask --
  encoded_mask = coco_mask.encode(mask_to_encode)[0]["counts"]

  # compress and base64 encoding --
  binary_str = zlib.compress(encoded_mask, zlib.Z_BEST_COMPRESSION)
  base64_str = base64.b64encode(binary_str)
  return base64_str.decode('utf-8')

In [None]:
def segmentate(segmentator, 
               image_id = '000a6c98-bb9b-11e8-b2b9-ac1f6b6435d0', 
               work='train'):
    '''
    Input:
    segmentator: a implemented segmentator for the segmentation of image;
    image_id <str>: the id of image;
    work <str>: ['train'|'test'], get image from train or test directory;
    _____________________________________________________________________
    Output:
    cell_mask_list <numpy tensor>: [n_mask, x, y];
    encode_mask_list <list of str>: Encoded mask following the formula of kaggle submission;
    '''
    
    mt = f'/kaggle/input/hpa-single-cell-image-classification/{work}/{image_id}_red.png'
    er = f'/kaggle/input/hpa-single-cell-image-classification/{work}/{image_id}_yellow.png'
    nu = f'/kaggle/input/hpa-single-cell-image-classification/{work}/{image_id}_blue.png'
    pr = f'/kaggle/input/hpa-single-cell-image-classification/{work}/{image_id}_green.png'
    
    images = [[mt], [er], [nu]]

    img_h = cv2.imread(mt).shape[0]
    
    cell_segmentations = segmentator.pred_cells(images)
    nuc_segmentations = segmentator.pred_nuclei(images[2])
    cell_nuclei_mask, cell_mask = label_cell(nuc_segmentations[0], cell_segmentations[0])

    numbers = set(np.ravel(cell_mask))
    numbers.remove(0)

    index = 1

    cell_mask_list = np.array([])
    encode_mask_list = np.array([])

    for number in numbers:
        isolated_cell = np.where(cell_mask==number, 1, 0)
        mask_to_bool = isolated_cell.astype(bool)
        encode_mask = encode_binary_mask(mask_to_bool)
        index += 1

        isolated_cell = np.expand_dims(isolated_cell, 0)
        encode_mask = np.expand_dims(encode_mask, 0)
        
        if number == 1: 
            cell_mask_list = np.array(isolated_cell)
            encode_mask_list = np.array(encode_mask)
        else: 
            cell_mask_list = np.concatenate((cell_mask_list, isolated_cell))
            encode_mask_list = np.concatenate((encode_mask_list, encode_mask))
            
        if img_h > 2048 and index == 16:
            break
            
    return cell_mask_list, encode_mask_list

In [None]:
def HPA_image(image_id='5c27f04c-bb99-11e8-b2b9-ac1f6b6435d0', work='train'):
    '''
    get a image of shape (Height, Width, 3*) from the kaggle dataset
    * 3 for rgb
    '''
    mt = f'/kaggle/input/hpa-single-cell-image-classification/{work}/{image_id}_red.png'
    er = f'/kaggle/input/hpa-single-cell-image-classification/{work}/{image_id}_yellow.png'
    nu = f'/kaggle/input/hpa-single-cell-image-classification/{work}/{image_id}_blue.png'
    pr = f'/kaggle/input/hpa-single-cell-image-classification/{work}/{image_id}_green.png'

    # images = [cv2.imread(mt), cv2.imread(er), cv2.imread(nu), cv2.imread(pr)]
    images = [cv2.imread(file ,cv2.IMREAD_GRAYSCALE) for file in [mt, er, nu, pr]]
    images = [np.expand_dims(image, -1) for image in images]
#     r = images[0] + images[1]
#     g = images[1]/2 + images[3]
#     b = images[2]
    r = images[0]
    g = images[3]
    b = images[2]
    image = np.concatenate([b, g, r], -1)
    
    if image.max() > 255 :
        img_max = image.max()
        image = (image/255).astype('uint8')
    
    return image

def make_bound(mask):
    '''
    make bounds for masks
       y_l____y_u
    x_l         |
    |           |
    |    mask   |
    x_u         |
    |___________|
    '''
    x_edge, y_edge = (np.sum(mask, 1) > 0), (np.sum(mask, 0) > 0)
    x_l, y_l = np.argmax(x_edge), np.argmax(y_edge)
    x_u, y_u = len(x_edge)-1-np.argmax(x_edge[::-1]), len(y_edge)-1-np.argmax(y_edge[::-1])
    return (x_l, x_u), (y_l, y_u)

def image_cut(image, mask):
    '''
    cut off mask's 0's background
    '''
    (x_u, x_d), (y_u, y_d) = make_bound(mask)
    image = image * np.expand_dims(mask, -1)
    return image[x_u:x_d, y_u:y_d, :]

def image_seize(image, mask, dsize=(64, 64)):
    image = cv2.resize(image_cut(image, mask).astype('uint8'), dsize=dsize)
#     plt.imshow(image)
#     plt.show()
    return image

def image_parse(segmentator, 
               image_id = '000a6c98-bb9b-11e8-b2b9-ac1f6b6435d0', 
               work='train',
               dsize=(64, 64)):
    
    cell_mask_list, encode_mask_list = segmentate(segmentator, 
               image_id = image_id, 
               work=work)
    
    image = HPA_image(image_id, work)
    
    cell_images = [np.expand_dims(image_seize(image, mask, dsize), 0) for mask in cell_mask_list]
    
    cell_batch = np.concatenate(cell_images, 0)
    
    return cell_batch, encode_mask_list

In [None]:
def batch_generate(ran=None, dsize=(64, 64)):
    df = pd.read_csv('/kaggle/input/hpa-single-cell-image-classification/train.csv')
    df_one_label = df[df.Label.apply(lambda seq: len(seq.split('|'))==1)]
    n_image = len(df_one_label)

    if ran is not None:
        df_slice = df_one_label.iloc[ran[0]%n_image:ran[0]%n_image+(ran[1]-ran[0])]
    else:
        df_slice = df_one_label

    cell_batches, label_batches = [], []

    for _, row in df_slice.iterrows():
        image_id, label = row['ID'], row['Label']
        cell_batch, _ = image_parse(segmentator,
                                       image_id = image_id, 
                                       work='train',
                                       dsize=dsize)
        label_batch = np.array([int(label) for _ in range(cell_batch.shape[0])])

        cell_batches.append(cell_batch)
        label_batches.append(label_batch)

    cells, labels = np.concatenate(cell_batches, 0), np.concatenate(label_batches, 0)
    
    return cells, labels

def minibatch_generate(cells, labels, batch_size=16):
    ids = np.random.permutation(np.arange(cells.shape[0]))
    cells, labels = cells[ids, :, :, :], labels[ids]
    idx = 0
    while idx + batch_size < cells.shape[0]:
        yield cells[idx:idx+batch_size, :, :, :], labels[idx:idx+batch_size]
        idx += batch_size
    yield cells[idx:, :, :, :], labels[idx:]
    
def unified_generate(ran=None, dsize=(64, 64), batch_size=16):
    cells, labels = batch_generate(ran=ran, dsize=dsize)
    for cells_mini, labels_mini in minibatch_generate(cells, labels):
        yield cells_mini, labels_mini

In [None]:
def batch_generate_test(ran=None, dsize=(64, 64)):
    os.chdir(r"/kaggle/input/hpa-single-cell-image-classification/test/")
    filenames = glob.glob("**_red.png")  
    image_ids = [re.findall("(.*)_red.png", filename)[0] for filename in filenames]
    n_image = len(image_ids)
    
    if ran is not None:
        image_ids = image_ids[ran[0]%n_image:ran[1]%n_image]

    for image_id in image_ids:
        cell_batch, encode_mask_list = image_parse(segmentator,
                                       image_id = image_id, 
                                       work='test',
                                       dsize=dsize)
        
        image = HPA_image(image_id=image_id, work='test')
        
        width, height = image.shape[1], image.shape[0]

        yield cell_batch, encode_mask_list, image_id, width, height
        
def create_predstr(label, confidence, encode_mask_list):
    strs = [' '.join([str(l), str(c), str(eml)]) for l, c, eml in zip(label, confidence, encode_mask_list)]
    predstr = ' '.join(strs)
    return predstr


def create_PredictionString(pred, encode_mask_list):
    label = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18] * encode_mask_list.shape[0]
    label = np.array(label)
    encode_mask_list = np.repeat(encode_mask_list, 19)
    
    strs = [' '.join([str(l), str(p), str(eml)]) for l, p, eml in zip(label, pred, encode_mask_list)]
    predstr = ' '.join(strs)
    
    return predstr

In [None]:
class CNN2d(nn.Module):
    def __init__(self, device):
        super(CNN2d, self).__init__()
        self.conv0 = nn.Conv2d(3, 16, (5, 5), padding=2)
        self.pool0 = nn.AvgPool2d((2, 2), 2)
        self.conv1 = nn.Conv2d(16, 32, (5, 5), padding=2)
        self.pool1 = nn.AvgPool2d((2, 2), 2)
        self.conv2 = nn.Conv2d(32, 64, (5, 5), padding=2)
        self.pool2 = nn.MaxPool2d((2, 2), 2)
        self.fc = nn.Sequential(
            nn.Linear(65536, 19),
        )
        
        self.criterion = nn.CrossEntropyLoss()
    
        self.optimizer = optim.Adam(self.parameters(), lr=1e-5)
        
        self.device = device
        
        self.train_buffer = None
        self.valid_buffer = None
        
        self.buffer_train_max = 2048
        self.buffer_valid_max = 512
        
        self.acc_threshold = 0.33
        
        self.dead_lim = 5
        
        self.dead_cnt = self.dead_lim
        
    def forward(self, x):
        x = self.conv0(x)
        x = self.pool0(x)
        x = self.conv1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.pool2(x)
        x = x.view(x.shape[0], -1)
        x = self.fc(x)
        return x
    
    def loss(self, pred, y):
        return self.criterion(pred, y)
    
    def fit(self, x, y):
        self.zero_grad()
        pred = self(x)
        loss = self.loss(pred, y)
        loss.backward()
        self.optimizer.step()
        loss = loss.cpu().detach().numpy()
        return loss
        
    def buffer_train_load(self, cells_mini, labels_mini):
        if self.train_buffer is None:
            self.train_buffer = (cells_mini, labels_mini)
        else:
            self.train_buffer = (torch.cat([self.train_buffer[0], cells_mini], 0), \
                                 torch.cat([self.train_buffer[1], labels_mini], 0))
        
        self.train_buffer = (self.train_buffer[0][torch.randperm(self.train_buffer[0].shape[0])], \
                             self.train_buffer[1][torch.randperm(self.train_buffer[1].shape[0])])
        
        if self.train_buffer[0].shape[0] > self.buffer_train_max:
            self.train_buffer = (self.train_buffer[0][:self.buffer_train_max], \
                                 self.train_buffer[1][:self.buffer_train_max])
        
    def buffer_valid_load(self, cells_mini, labels_mini):
        if self.valid_buffer is None:
            self.valid_buffer = (cells_mini, labels_mini)
        else:
            self.valid_buffer = (torch.cat([self.valid_buffer[0], cells_mini], 0), \
                                 torch.cat([self.valid_buffer[1], labels_mini], 0))
        
        self.valid_buffer = (self.valid_buffer[0][torch.randperm(self.valid_buffer[0].shape[0])], \
                             self.valid_buffer[1][torch.randperm(self.valid_buffer[1].shape[0])])
        
        if self.valid_buffer[0].shape[0] > self.buffer_valid_max:
            self.valid_buffer = (self.valid_buffer[0][:self.buffer_valid_max], \
                                 self.valid_buffer[1][:self.buffer_valid_max])
            
    def buffer_generator(self, src='train', batch_size=16):
        if src == 'train':
            buffer = self.train_buffer
        elif src == 'valid':
            buffer = self.valid_buffer
            
        idx = 0
        while idx * batch_size < buffer[0].shape[0]:
            yield (buffer[0][idx*batch_size:(idx+1)*batch_size], \
                   buffer[1][idx*batch_size:(idx+1)*batch_size])
            idx += 1
            
    def fill_buffer(self, dsize=(256, 256), batch_size=16):
        print('filling train buffer')
        while self.train_buffer is None or self.train_buffer[0].shape[0] < self.buffer_train_max:
            idx = np.random.randint(1e8)
            unified_generator = unified_generate(ran=(idx, idx + 1), dsize=dsize, batch_size=batch_size)
            for cells_mini, labels_mini in unified_generator:
                cells_mini, labels_mini = torch.FloatTensor(cells_mini).to(self.device), torch.LongTensor(labels_mini).to(self.device)
                cells_mini = cells_mini.permute(0, 3, 1, 2)
                self.buffer_train_load(cells_mini, labels_mini)
        
        print('filling valid buffer')
        while self.valid_buffer is None or self.valid_buffer[0].shape[0] < self.buffer_valid_max:
            idx = np.random.randint(1e8)
            unified_generator = unified_generate(ran=(idx, idx + 1), dsize=dsize, batch_size=batch_size)
            for cells_mini, labels_mini in unified_generator:
                cells_mini, labels_mini = torch.FloatTensor(cells_mini).to(self.device), torch.LongTensor(labels_mini).to(self.device)
                cells_mini = cells_mini.permute(0, 3, 1, 2)
                self.buffer_valid_load(cells_mini, labels_mini)
    
    def train(self, n_epoch=100, e_valid=4, ran_stride=1, dsize=(256, 256), batch_size=16):
        for epoch in range(n_epoch):
            log = ""
            log += f'Epoch#{epoch+1};'
            train_losses = []
            idx = np.random.randint(1e8)
            unified_generator = unified_generate(ran=(idx, idx + ran_stride), dsize=dsize, batch_size=batch_size)
            for cells_mini, labels_mini in unified_generator:
                cells_mini, labels_mini = torch.FloatTensor(cells_mini).to(self.device), torch.LongTensor(labels_mini).to(self.device)
                cells_mini = cells_mini.permute(0, 3, 1, 2)
                loss = self.fit(cells_mini, labels_mini)
                train_losses.append(loss)
                self.buffer_train_load(cells_mini, labels_mini)
            
            for cells_mini, labels_mini in self.buffer_generator(batch_size=batch_size, src='train'):
                cells_mini, labels_mini = torch.FloatTensor(cells_mini).to(self.device), torch.LongTensor(labels_mini).to(self.device)
                loss = self.fit(cells_mini, labels_mini)
                train_losses.append(loss)

            train_loss = sum(train_losses)/len(train_losses)
            log += f'Train Loss#{train_loss};'

            if (epoch+1) % e_valid == 0:
                acc = self.valid(epoch=epoch, batch_size=batch_size)
                log += f'Accuracy#{acc};'
                if acc > self.acc_threshold:
                    self.dead_cnt -= 1
                else:
                    self.dead_cnt = self.dead_lim

                if self.dead_cnt == 0:
                    break
                
                
            print(log)
                
    def valid(self, epoch, ran_stride=1, dsize=(256, 256), batch_size=16):
        results = []
        idx = np.random.randint(1e8)
        unified_generator = unified_generate(ran=(idx, idx + ran_stride), dsize=dsize, batch_size=batch_size)
        for cells_mini, labels_mini in unified_generator:
            cells_mini, labels_mini = torch.FloatTensor(cells_mini).to(self.device), torch.LongTensor(labels_mini).to(self.device)
            cells_mini = cells_mini.permute(0, 3, 1, 2)
            self.buffer_valid_load(cells_mini, labels_mini)
            
        for cells_mini, labels_mini in self.buffer_generator(batch_size=batch_size, src='valid'):
            preds_mini = self(cells_mini)
            preds_mini = torch.argmax(preds_mini, -1)
            preds_mini, labels_mini = preds_mini.cpu().detach().numpy(), labels_mini.cpu().detach().numpy()
            print(preds_mini)
            print(labels_mini)
            result = np.equal(preds_mini, labels_mini)
            results.append(result)

        results = [item for result in results for item in result]
            
        acc = sum(results)/(len(results) + 1e-12)
        return acc
        
    def pred(self, cell_batch):
        pred = self(cell_batch)
        pred = torch.softmax(pred, -1)
#         label = torch.argmax(pred, -1)
#         confidence, _ = torch.max(pred, -1)
#         return label, confidence

        return pred
    
    def test(self):
        ID, ImageWidth, ImageHeight, PredictionString = [], [], [], []
        for cell_batch, encode_mask_list, image_id, width, height in batch_generate_test(ran=None, dsize=(256, 256)):
            cell_batch = torch.FloatTensor(cell_batch).to(device)
            cell_batch = cell_batch.permute(0, 3, 1, 2)
#             label, confidence = self.pred(cell_batch)
#             label, confidence = label.cpu().detach().numpy(), confidence.cpu().detach().numpy()
            pred = self.pred(cell_batch)
            pred = pred.cpu().detach().numpy()
#             print(pred.shape)
#             print(pred)
            pred = np.ravel(pred)
#             print(pred.shape)
#             print(pred)
#             predstr = create_predstr(label, confidence, encode_mask_list)
            predstr = create_PredictionString(pred, encode_mask_list)
            ID.append(image_id)
            ImageWidth.append(width)
            ImageHeight.append(height)
            PredictionString.append(predstr)

        df = pd.DataFrame({
            'ID':ID,
            'ImageWidth':ImageWidth, 
            'ImageHeight':ImageHeight, 
            'PredictionString':PredictionString,
        })
        return df

In [None]:
class CNN(nn.Module):
    def __init__(self, dir_name, device):
        super().__init__()
#         self.images_train = np.load(f'./{dir_name}/image_train.npy')
#         self.images_valid = np.load(f'./{dir_name}/image_valid.npy')
#         self.labels_train = np.load(f'./{dir_name}/label_train.npy')
#         self.labels_valid = np.load(f'./{dir_name}/label_valid.npy')

        # self.images_train = torch.FloatTensor(self.images_train)
        # self.images_valid = torch.FloatTensor(self.images_valid)
        # self.labels_train = torch.LongTensor(self.labels_train)
        # self.labels_valid = torch.LongTensor(self.labels_valid)

        self.build_model()

        self.criterion = nn.CrossEntropyLoss()

        self.optimizer = optim.Adam(self.parameters(), lr=4e-5)

        self.device = device

    def build_model(self):
        self.conv0 = nn.Conv2d(3, 64, (5, 5), padding=2)
        self.pool0 = nn.AvgPool2d((2, 2), 2)
        self.conv1 = nn.Conv2d(64, 256, (5, 5), padding=2)
        self.pool1 = nn.AvgPool2d((2, 2), 2)
        self.conv2 = nn.Conv2d(256, 512, (5, 5), padding=2)
        self.pool2 = nn.MaxPool2d((2, 2), 2)
        self.fc = nn.Sequential(
            nn.Linear(65536*8, 19),
        )

    def forward(self, x):
        x = self.conv0(x)
        x = self.pool0(x)
        x = self.conv1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.pool2(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fc(x)
        return x


    def fit(self, image, label):
        image, label = torch.FloatTensor(image).to(device), torch.LongTensor(label).to(device)
        image = image.permute(0, 3, 1, 2)
        self.zero_grad()
        pred = self(image)
        loss = self.criterion(pred, label)
        loss.backward()
        self.optimizer.step()
        return loss.item()

    def eval(self, image, label):
        image, label = torch.FloatTensor(image).to(device), torch.LongTensor(label).to(device)
        image = image.permute(0, 3, 1, 2)
        pred = self(image)
        pred = torch.argmax(pred, -1)
        result = pred.eq(label).cpu().detach().numpy()
        acc = sum(result)/len(result)
        return acc

    def train(self, batch_size=16, n_epoch=100, e_epoch=4, threshold=1.0):
        for epoch in range(n_epoch):
            print(f'Epoch#{epoch+1}')
            losses = []
            for n in tqdm(range(0, self.images_train.shape[0], batch_size)):
                loss = self.fit(self.images_train[n:(n+batch_size)], self.labels_train[n:(n+batch_size)])
                losses.append(loss)
            loss_train = sum(losses)/len(losses)
            print(f'Train Loss#{loss_train}')

            if (epoch + 1) % e_epoch == 0:
                n = 0
                accs = []
                for n in tqdm(range(0, self.images_valid.shape[0], batch_size)):
                    acc = self.eval(self.images_valid[n:(n+batch_size)], self.labels_valid[n:(n+batch_size)])
                    accs.append(acc)
                acc_valid = sum(accs)/len(accs)
                print(f'Valid Accuracy#{acc_valid}')



class ResNet(CNN):
    def __init__(self, dir_name, device):
        super().__init__(dir_name, device)
        # self.images_train = np.load(f'./{dir_name}/image_train.npy')
        # self.images_valid = np.load(f'./{dir_name}/image_valid.npy')
        # self.labels_train = np.load(f'./{dir_name}/label_train.npy')
        # self.labels_valid = np.load(f'./{dir_name}/label_valid.npy')

        # self.images_train = torch.FloatTensor(self.images_train)
        # self.images_valid = torch.FloatTensor(self.images_valid)
        # self.labels_train = torch.LongTensor(self.labels_train)
        # self.labels_valid = torch.LongTensor(self.labels_valid)

        self.build_model()

        self.criterion = nn.CrossEntropyLoss()

        self.optimizer = optim.Adam(self.parameters(), lr=4e-5)

        self.device = device

    def build_model(self):
        self.model = torchvision.models.resnet18(pretrained=True)
        self.model.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=1, padding=3, bias=False)
        self.fc_features = self.model.fc.in_features
        self.OUT_CLASSES = 19
        self.model.fc = nn.Linear(self.fc_features, self.OUT_CLASSES)
        
        
    def forward(self, x):
        return self.model(x)

In [None]:
class ResNet18(CNN2d): 
    def __init__(self, device, model_path=None):
        super().__init__(device)
        self.model = ResNet('data', device).to(device)
        
        if model_path is not None:
            self.model.load_state_dict(torch.load(model_path))
        
        self.criterion = nn.CrossEntropyLoss()
    
        self.optimizer = optim.Adam(self.parameters(), lr=1e-4)
        
        self.device = device

    def forward(self, x):
        return self.model(x)

In [None]:
import torchvision
dsize = (256, 256)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = ResNet18(device, model_path="/kaggle/input/resnet18-parameter/ResNet18_param.pkl").to(device)
# model.train(n_epoch=100, batch_size=64) # About 30s(CPU) for one train epoch (1min with valid)
df_submit = model.test() # About 40s(CPU) for one image (Total: About 5~7h)

In [None]:
df_submit # <--The file for submission
df_submit.to_csv("/kaggle/working/submission.csv",index=False)

In [None]:
df_submit

In [None]:
df_submit['PredictionString'][0]