In [None]:
#Single Cells Model ResNet50 and EFF_B4

!pip install /kaggle/input/iterative-stratification/iterative-stratification-master/

import sys
package_path = '../input/efficientnet-pytorch/EfficientNet-PyTorch/EfficientNet-PyTorch-master'
sys.path.append(package_path)

!ls ../input/efficientnet-pytorch/EfficientNet-PyTorch
%cd /kaggle/input/efficientnet-pytorch/EfficientNet-PyTorch/EfficientNet-PyTorch-master
from efficientnet_pytorch import EfficientNet

import pandas as pd
import numpy as np
from fastai.vision.all import *
import pickle
import os
if not os.path.exists('/root/.cache/torch/hub/checkpoints/'):
        os.makedirs('/root/.cache/torch/hub/checkpoints/')
!cp '../input/resnet50/resnet50.pth' '/root/.cache/torch/hub/checkpoints/resnet50-19c8e357.pth'
!cp '../input/efficientnet-pytorch-pretrained/adv-efficientnet-b4-44fb3a87.pth' '/root/.cache/torch/hub/checkpoints/'
!cp '../input/efficientnet-pytorch/efficientnet-b4-e116e8b3.pth' '/root/.cache/torch/hub/checkpoints/'

path = Path('../input/hpa-cell-tiles-sample-balanced-dataset')

df = pd.read_csv(path/'cell_df.csv')

labels = [str(i) for i in range(19)]
for x in labels: df[x] = df['image_labels'].apply(lambda r: int(x in r.split('|')))


dfs = df.sample(frac=1, random_state=42)
dfs = dfs.reset_index(drop=True)

unique_counts = {}
for lbl in labels:
    unique_counts[lbl] = len(dfs[dfs.image_labels == lbl])

full_counts = {}
for lbl in labels:
    count = 0
    for row_label in dfs['image_labels']:
        if lbl in row_label.split('|'): count += 1
    full_counts[lbl] = count
    
counts = list(zip(full_counts.keys(), full_counts.values(), unique_counts.values()))
counts = np.array(sorted(counts, key=lambda x:-x[1]))
counts = pd.DataFrame(counts, columns=['label', 'full_count', 'unique_count'])
counts.set_index('label').T

nfold = 5
seed = 42

y = dfs[labels].values
X = dfs[['image_id', 'cell_id']].values

dfs['fold'] = np.nan

from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
mskf = MultilabelStratifiedKFold(n_splits=nfold, random_state=seed,shuffle=True)
for i, (_, test_index) in enumerate(mskf.split(X, y)):
    dfs.iloc[test_index, -1] = i
    
dfs['fold'] = dfs['fold'].astype('int')
dfs['is_valid'] = False
dfs['is_valid'][dfs['fold'] == 0] = True

dfs.is_valid.value_counts()

def get_x(r):return path/'cells'/(r['image_id']+'_'+str(r['cell_id'])+'.jpg')
def get_y(r): return r['image_labels'].split('|')
get_y(dfs.loc[12])

sample_stats = ([0.07237246, 0.04476176, 0.07661699], [0.17179589, 0.10284516, 0.14199627])

item_tfms = RandomResizedCrop(640, min_scale=0.75, ratio=(1.,1.))
batch_tfms = [*aug_transforms(flip_vert=True, size=128, max_warp=0), Normalize.from_stats(*sample_stats)]
bs=256

dblock = DataBlock(blocks=(ImageBlock, MultiCategoryBlock(vocab=labels)),
                splitter=ColSplitter(col='is_valid'),
                get_x=get_x,
                get_y=get_y,
                item_tfms=item_tfms,
                batch_tfms=batch_tfms
                )
dls = dblock.dataloaders(dfs, bs=bs)

dls.show_batch(nrows=3, ncols=3)

learn0 = cnn_learner(dls, resnet50, metrics=[accuracy_multi, PrecisionMulti()]).to_fp16()

def get_learner(lr=1e-3):
    opt_func = partial(Adam, lr=lr, wd=0.01, eps=1e-8)
  
    model = EfficientNet.from_pretrained("efficientnet-b4", advprop=True)
    
    #model._fc = nn.Linear(1280, data.c)# the last layer... # works for b0,b1
    #model._fc = nn.Linear(1536, data.c)# the last layer... B3
    model._fc = nn.Linear(1792, dls.c)# the last layer... B4
    ##model._fc = nn.Linear(2048, dls.c)# the last layer... B5
    #model._fc = nn.Linear(2304, dls.c)# the last layer... B6
    #model._fc = nn.Linear(2560, dls.c)# the last layer... B7
    #model._fc = nn.Linear(2816, data.c)# the last layer... B8

    learn = Learner(
        dls, model, opt_func=opt_func,
        metrics=[accuracy_multi, PrecisionMulti()]
        ).to_fp16()
    return learn

learn1=get_learner()

!mkdir ./models

!cp ../input/fastai-cell-tile-prototyping-th2/models/trained_model.pth ./models
!mv ./models/trained_model.pth ./models/trained_model0.pth
learn0 = learn0.load("trained_model0")

!cp ../input/fastai-cell-tile-prototyping-th2-effb4/models/trained_model.pth ./models
!mv ./models/trained_model.pth ./models/trained_model1.pth
learn1 = learn1.load("trained_model1")


In [None]:
#Commit options

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

sub = pd.read_csv("../input/hpa-single-cell-image-classification/sample_submission.csv")
df=sub
data_df=sub
test_df=sub
debug=False
commit=12

if len(sub) == 559:
    sub=sub[:commit]
    df=df[:commit]
    data_df=data_df[:commit]
    debug=True
else:
    sub=sub
    df=df
    data_df=data_df

bs=500 #How many images to process in each batch #If this value is set too large, a memory leak will occur
    

In [None]:
#hpacellsegmentatormaster install

!pip install ../input/hpacellsegmentatormaster/HPA-Cell-Segmentation-master/
!pip install ../input/hpapytorchzoozip/pytorch_zoo-master/
!pip install ../input/pycocotools/pycocotools-2.0-cp37-cp37m-linux_x86_64.whl

In [None]:
def sub_process(cell_df0,preds):
    cell_df0['cls'] = ''

    threshold = 0.0

    for i in range(preds.shape[0]): 
        p = torch.nonzero(preds[i] > threshold).squeeze().numpy().tolist()
        if type(p) != list: p = [p]
        if len(p) == 0: cls = [(preds[i].argmax().item(), preds[i].max().item())]
        else: cls = [(x, preds[i][x].item()) for x in p]
        cell_df0['cls'].loc[i] = cls

    return cell_df0

In [None]:
def ensemble_func(cell_df,cell_df1):
    
    test=cell_df.copy()
        
    for i in range(len(cell_df)):
        df=pd.DataFrame(cell_df.cls[i])
        df_pred=0.6*pd.DataFrame(cell_df.cls[i])+0.4*pd.DataFrame(cell_df1.cls[i])
        df[1]=df_pred[1]
        test.cls[i]=df.values.tolist()
    
    return test

In [None]:
#Segmentation,Cropping into singel cells and Inference

import hpacellseg.cellsegmentator as cellsegmentator
from hpacellseg.utils import label_cell, label_nuclei
from tqdm import tqdm
import os
import numpy as np
import pandas as pd

NUC_MODEL = "../input/hpacellsegmentatormodelweights/dpn_unet_nuclei_v1.pth"
CELL_MODEL = "../input/hpacellsegmentatormodelweights/dpn_unet_cell_3ch_v1.pth"
segmentator = cellsegmentator.CellSegmentator(
    NUC_MODEL,
    CELL_MODEL,
    scale_factor=0.25,
    device="cuda",
    padding=True,
    multi_channel_model=True,
)


def get_segment_mask(data_id, root='../input/hpa-single-cell-image-classification/test/'):
    r = [os.path.join(root, f'{data_id}_red.png')]
    y = [os.path.join(root, f'{data_id}_yellow.png')]
    b = [os.path.join(root, f'{data_id}_blue.png')]
    data = [r, y, b]
    nuc_segmentations = segmentator.pred_nuclei(data[2])
    cell_segmentations = segmentator.pred_cells(data)
    nuclei_mask, cell_mask = label_cell(nuc_segmentations[0], cell_segmentations[0])
    return nuclei_mask, cell_mask

from fastai.vision.all import *
import pandas as pd
import numpy as np
import os
from tqdm import tqdm
import cv2
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline


path = Path('../input/hpa-single-cell-image-classification')
ROOT = '../input/hpa-single-cell-image-classification/'
train_or_test = 'test'

def get_cropped_cell(img, msk):
    bmask = msk.astype(int)[...,None]
    masked_img = img * bmask
    true_points = np.argwhere(bmask)
    top_left = true_points.min(axis=0)
    bottom_right = true_points.max(axis=0)
    cropped_arr = masked_img[top_left[0]:bottom_right[0]+1,top_left[1]:bottom_right[1]+1]
    return cropped_arr

def get_stats(cropped_cell):
    x = (cropped_cell/255.0).reshape(-1,3).mean(0)
    x2 = ((cropped_cell/255.0)**2).reshape(-1,3).mean(0)
    return x, x2

def read_img(image_id, color, train_or_test='test', image_size=None):
    filename = f'{ROOT}/{train_or_test}/{image_id}_{color}.png'
    assert os.path.exists(filename), f'not found {filename}'
    img = cv2.imread(filename, cv2.IMREAD_UNCHANGED)
    if image_size is not None:
        img = cv2.resize(img, (image_size, image_size))
    if img.max() > 255:
        img_max = img.max()
        img = (img/255).astype('uint8')
    return img

import base64
import numpy as np
from pycocotools import _mask as coco_mask
import typing as t
import zlib


def encode_binary_mask(mask: np.ndarray) -> t.Text:
  """Converts a binary mask into OID challenge encoding ascii text."""

  # check input mask --
  if mask.dtype != np.bool:
    raise ValueError(
        "encode_binary_mask expects a binary mask, received dtype == %s" %
        mask.dtype)

  mask = np.squeeze(mask)
  if len(mask.shape) != 2:
    raise ValueError(
        "encode_binary_mask expects a 2d mask, received shape == %s" %
        mask.shape)

  # convert input mask to expected COCO API input --
  mask_to_encode = mask.reshape(mask.shape[0], mask.shape[1], 1)
  mask_to_encode = mask_to_encode.astype(np.uint8)
  mask_to_encode = np.asfortranarray(mask_to_encode)

  # RLE encode mask --
  encoded_mask = coco_mask.encode(mask_to_encode)[0]["counts"]

  # compress and base64 encoding --
  binary_str = zlib.compress(encoded_mask, zlib.Z_BEST_COMPRESSION)
  base64_str = base64.b64encode(binary_str)
  return base64_str.decode('ascii')

num_files = len(df)
sub_full = pd.DataFrame(columns=["ID","ImageWidth","ImageHeight","PredictionString"])

data_size = len(data_df)

for i in range(0, data_size, bs):
    print('!!!!', i, '!!!!')
    
    x_tot,x2_tot = [],[]
    all_cells = []
    
    start = i
    end = min(len(data_df), start + bs)
    test_df = data_df[start:end]

    root = './temp/cells'
    os.makedirs(root, exist_ok=True)

    print('---- start mask write ----')
    for image_id in tqdm(test_df.ID.to_list()):
        nuc, cell = get_segment_mask(image_id)

        red = read_img(image_id, "red", train_or_test, None)#512
        green = read_img(image_id, "green", train_or_test, None)
        blue = read_img(image_id, "blue", train_or_test, None)
        cell_mask = cv2.resize(cell,red.shape,interpolation=cv2.INTER_NEAREST)#cell##np.load(f'/kaggle/working/test_mask/{image_id}_cell.npy')
        #yellow = read_img(image_id, "yellow", train_or_test, image_size)
        stacked_image = np.transpose(np.array([blue, green, red]), (1,2,0))

        for j in range(1, np.max(cell_mask)):
       
            bmask = (cell_mask == j)
            enc = encode_binary_mask(bmask)
            cropped_cell = get_cropped_cell(stacked_image, bmask)
            fname = f'{image_id}_{j}.jpg'
            ##im = cv2.imencode('.jpg', cropped_cell)[1]
            ##img_out.writestr(fname, im)
            cv2.imwrite(f'./temp/cells/{fname}', cropped_cell)
            x, x2 = get_stats(cropped_cell)
            x_tot.append(x)
            x2_tot.append(x2)
            all_cells.append({
                'image_id': image_id,
                'fname': fname,
                'r_mean': x[0],
                'g_mean': x[1],
                'b_mean': x[2],
                'cell_id': j,
                'size1': cropped_cell.shape[0],
                'size2': cropped_cell.shape[1],
                'enc': enc,
            })
      
    path = Path('/kaggle/working/temp')

    cell_df = pd.DataFrame(all_cells)###
    test_dl0 = learn0.dls.test_dl(cell_df)
    test_dl1 = learn1.dls.test_dl(cell_df)

    preds0, _ = learn0.tta(dl=test_dl0,n=3)
    preds1, _ = learn1.tta(dl=test_dl1,n=2)
    ##preds1=torch.tensor(seresnext50(cell_df))
    
    !rm /kaggle/working/temp/cells/* 
    !rmdir /kaggle/working/temp/cells  
    !rmdir /kaggle/working/temp
    
    print('---- finish mask write ----')

    sub0=sub_process(cell_df,preds0)
    cell_df1=cell_df.copy()
    sub1=sub_process(cell_df1,preds1)
    
    cell_df0=ensemble_func(sub0,sub1)
    
    def combine(r):
        cls = r[0]
        enc = r[1]
        classes = [str(int(c[0])) + ' ' + str(c[1]) + ' ' + enc for c in cls]
        return ' '.join(classes)

    cell_df0['pred'] = cell_df0[['cls', 'enc']].apply(combine, axis=1)

    subm = cell_df0.groupby(['image_id'])['pred'].apply(lambda x: ' '.join(x)).reset_index()

    sample_submission = pd.read_csv('../input/hpa-single-cell-image-classification/sample_submission.csv')

    sub = pd.merge(
        sample_submission,
        subm,
        how="left",
        left_on='ID',
        right_on='image_id',
    )

    def isNaN(num):
        return num != num

    for i, row in sub.iterrows():
        if isNaN(row['pred']): continue
        sub.PredictionString.loc[i] = row['pred']

    sub = sub[sample_submission.columns]
    sub_full=pd.concat([sub_full,sub[start:end]])
sub_full.to_csv('submission0.csv', index=False)
    
ss_df=sub_full     


In [None]:
!pip install /kaggle/input/kerasapplications -q
!pip install /kaggle/input/efficientnet-keras-source-code/ -q --no-deps

import tensorflow as tf; print(f"\t\t– TENSORFLOW VERSION: {tf.__version__}");
import pandas as pd; pd.options.mode.chained_assignment = None;
import numpy as np; print(f"\t\t– NUMPY VERSION: {np.__version__}");
import torch

import pandas as pd
import os

import efficientnet.tfkeras as efn
import numpy as np
import pandas as pd
import tensorflow as tf

# Built In Imports
from collections import Counter
from datetime import datetime
import multiprocessing
from glob import glob
import warnings
import requests
import imageio
import IPython
import urllib
import zipfile
import pickle
import random
import shutil
import string
import math
import tqdm
import time
import gzip
import sys
import ast
import csv; csv.field_size_limit(sys.maxsize)
import io
import os
import gc
import re

# Visualization Imports
from matplotlib.colors import ListedColormap
import matplotlib.patches as patches
import plotly.graph_objects as go
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import plotly.express as px
import seaborn as sns
from PIL import Image
import matplotlib; print(f"\t\t– MATPLOTLIB VERSION: {matplotlib.__version__}");
import plotly
import PIL
import cv2
import typing as t
import base64
import zlib


In [None]:
#Image Level Model SEResNext50_32x4d

def seres_imlevel():

    # ====================================================
    # Directory settings
    # ====================================================
    import os

    MODEL_DIR = '../input/seres-imlevel-folds/'

    OUTPUT_DIR = './'
    if not os.path.exists(OUTPUT_DIR):
        os.makedirs(OUTPUT_DIR)

    TEST_PATH = '../input/hpa-single-cell-image-classification/test'

    # ====================================================
    # CFG
    # ====================================================
    class CFG:
        debug=False
        num_workers=4
        model_name='seresnext50_32x4d'#'resnet200d_320'#'resnext50_32x4d'
        size=640
        batch_size=16#64
        seed=42
        target_size=19
        target_cols=['0','1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18']

        n_fold=4
        trn_fold=[0, 1, 2]#, 3]

    # ====================================================
    # Library
    # ====================================================
    import sys
    sys.path.append('../input/pytorch-image-models/pytorch-image-models-master')

    import os
    import math
    import time
    import random
    import shutil
    from pathlib import Path
    from contextlib import contextmanager
    from collections import defaultdict, Counter

    import scipy as sp
    import numpy as np
    import pandas as pd

    from sklearn import preprocessing
    from sklearn.metrics import roc_auc_score
    from sklearn.model_selection import StratifiedKFold, GroupKFold, KFold

    from tqdm.auto import tqdm
    from functools import partial

    import cv2
    from PIL import Image

    from matplotlib import pyplot as plt

    import torch
    import torch.nn as nn
    import torch.nn.functional as F
    from torch.optim import Adam, SGD
    import torchvision.models as models
    from torch.nn.parameter import Parameter
    from torch.utils.data import DataLoader, Dataset
    from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR, ReduceLROnPlateau

    from albumentations import (
        Compose, OneOf, Normalize, Resize, RandomResizedCrop, RandomCrop, HorizontalFlip, VerticalFlip, 
        RandomBrightness, RandomContrast, RandomBrightnessContrast, Rotate, ShiftScaleRotate, Cutout, 
        IAAAdditiveGaussianNoise, Transpose
        )
    from albumentations.pytorch import ToTensorV2
    from albumentations import ImageOnlyTransform

    import timm

    from torch.cuda.amp import autocast, GradScaler

    import warnings 
    warnings.filterwarnings('ignore')

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # ====================================================
    # Utils
    # ====================================================
    def get_score(y_true, y_pred):
        scores = []
        for i in range(y_true.shape[1]):
            score = roc_auc_score(y_true[:,i], y_pred[:,i])
            scores.append(score)
        avg_score = np.mean(scores)
        return avg_score, scores


    def get_result(result_df):
        preds = result_df[[f'pred_{c}' for c in CFG.target_cols]].values
        labels = result_df[CFG.target_cols].values
        score, scores = get_score(labels, preds)
        LOGGER.info(f'Score: {score:<.4f}  Scores: {np.round(scores, decimals=4)}')


    @contextmanager
    def timer(name):
        t0 = time.time()
        LOGGER.info(f'[{name}] start')
        yield
        LOGGER.info(f'[{name}] done in {time.time() - t0:.0f} s.')


    def init_logger(log_file=OUTPUT_DIR+'inference.log'):
        from logging import getLogger, INFO, FileHandler,  Formatter,  StreamHandler
        logger = getLogger(__name__)
        logger.setLevel(INFO)
        handler1 = StreamHandler()
        handler1.setFormatter(Formatter("%(message)s"))
        handler2 = FileHandler(filename=log_file)
        handler2.setFormatter(Formatter("%(message)s"))
        logger.addHandler(handler1)
        logger.addHandler(handler2)
        return logger

    LOGGER = init_logger()


    def seed_torch(seed=42):
        random.seed(seed)
        os.environ['PYTHONHASHSEED'] = str(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed(seed)
        torch.backends.cudnn.deterministic = True

    seed_torch(seed=CFG.seed)

    test = pd.read_csv('../input/hpa-single-cell-image-classification/sample_submission.csv')
    print(test.shape)
    test.head()

    if CFG.debug:
        test = test.head()

    # ====================================================
    # Dataset
    # ====================================================
    class TestDataset(Dataset):
        def __init__(self, df, transform=None):
            self.df = df
            self.file_names = df['ID'].values
            self.transform = transform

        def __len__(self):
            return len(self.df)

        def __getitem__(self, idx):
            file_name = self.file_names[idx]
            file_path = f'{TEST_PATH}/{file_name}_green.png'
            image = cv2.imread(file_path)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            if self.transform:
                augmented = self.transform(image=image)
                image = augmented['image']
            return image

    # ====================================================
    # Transforms
    # ====================================================
    def get_transforms(*, data):

        if data == 'train':
            return Compose([
                Resize(CFG.size, CFG.size),
                Normalize(
                    mean=[0.485, 0.456, 0.406],
                    std=[0.229, 0.224, 0.225],
                ),
                ToTensorV2(),
            ])

        elif data == 'valid':
            return Compose([
                Resize(CFG.size, CFG.size),
                Normalize(
                    mean=[0.485, 0.456, 0.406],
                    std=[0.229, 0.224, 0.225],
                ),
                ToTensorV2(),
            ])

    # ====================================================
    # MODEL
    # ====================================================
    class CustomResNext(nn.Module):
        def __init__(self, model_name='resnext50_32x4d', pretrained=False):
            super().__init__()
            self.model = timm.create_model(model_name, pretrained=pretrained)
            n_features = self.model.fc.in_features
            self.model.fc = nn.Linear(n_features, CFG.target_size)

        def forward(self, x):
            x = self.model(x)
            return x

    class CustomEfficientNet(nn.Module):
        def __init__(self, model_name=CFG.model_name, pretrained=False):
            super().__init__()
            self.model = timm.create_model(CFG.model_name, pretrained=pretrained)
            n_features = self.model.classifier.in_features
            self.model.classifier = nn.Linear(n_features, CFG.target_size)

        def forward(self, x):
            x = self.model(x)
            return x

    # ====================================================
    # Helper functions
    # ====================================================
    def inference(model, states, test_loader, device):
        model.to(device)
        tk0 = tqdm(enumerate(test_loader), total=len(test_loader))
        probs = []
        for i, (images) in tk0:
            images = images.to(device)
            avg_preds = []
            for state in states:
                model.load_state_dict(state['model'])
                model.eval()
                with torch.no_grad():
                    y_preds = model(images)
                avg_preds.append(y_preds.sigmoid().to('cpu').numpy())
            avg_preds = np.mean(avg_preds, axis=0)
            probs.append(avg_preds)
        probs = np.concatenate(probs)
        return probs

    def inference0(models, states, test_loader, device):
        model.to(device)
        tk0 = tqdm(enumerate(test_loader), total=len(test_loader))
        probs = []
        for i, (images) in tk0:
            images = images.to(device)
            avg_preds = []
            for state in states:
                with torch.no_grad():
                    y_preds1 = model(images)
                    y_preds2 = model(images.flip(-1))
                y_preds = (y_preds1.sigmoid().to('cpu').numpy() + y_preds2.sigmoid().to('cpu').numpy()) / 2
                avg_preds.append(y_preds)
            avg_preds = np.mean(avg_preds, axis=0)
            probs.append(avg_preds)
        probs = np.concatenate(probs)
        return probs

    # ====================================================
    # inference
    # ====================================================
    model = CustomResNext(CFG.model_name, pretrained=False)
    states = [torch.load(MODEL_DIR+f'{CFG.model_name}_fold{fold}_best.pth') for fold in CFG.trn_fold]
    test_dataset = TestDataset(test, transform=get_transforms(data='valid'))
    test_loader = DataLoader(test_dataset, batch_size=CFG.batch_size, shuffle=False, 
                             num_workers=CFG.num_workers, pin_memory=True)
    predictions = inference(model, states, test_loader, device)
    
    # submission
    test[CFG.target_cols] = predictions
    #test[['ID'] + CFG.target_cols].to_csv(OUTPUT_DIR+'submission.csv', index=False)
    #test.head()
    
    return test

In [None]:
#Image Level Model EFF B3 not used
def eff_imlevel():
    # ====================================================
    # Directory settings
    # ====================================================
    import os

    MODEL_DIR = '../input/effb3-image-level/'

    OUTPUT_DIR = './'
    if not os.path.exists(OUTPUT_DIR):
        os.makedirs(OUTPUT_DIR)

    TEST_PATH = '../input/hpa-single-cell-image-classification/test'
    
    # ====================================================
    # CFG
    # ====================================================
    class CFG:
        debug=False
        num_workers=4
        model_name='tf_efficientnet_b3'
        size=640
        batch_size=8
        seed=42
        target_size=19
        target_cols=['0','1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18']

        n_fold=4
        trn_fold=[0, 1, 2]

    # ====================================================
    # Library
    # ====================================================
    import sys
    sys.path.append('../input/pytorch-image-models/pytorch-image-models-master')

    import os
    import math
    import time
    import random
    import shutil
    from pathlib import Path
    from contextlib import contextmanager
    from collections import defaultdict, Counter

    import scipy as sp
    import numpy as np
    import pandas as pd

    from sklearn import preprocessing
    from sklearn.metrics import roc_auc_score
    from sklearn.model_selection import StratifiedKFold, GroupKFold, KFold

    from tqdm.auto import tqdm
    from functools import partial

    import cv2
    from PIL import Image

    from matplotlib import pyplot as plt

    import torch
    import torch.nn as nn
    import torch.nn.functional as F
    from torch.optim import Adam, SGD
    import torchvision.models as models
    from torch.nn.parameter import Parameter
    from torch.utils.data import DataLoader, Dataset
    from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR, ReduceLROnPlateau

    from albumentations import (
        Compose, OneOf, Normalize, Resize, RandomResizedCrop, RandomCrop, HorizontalFlip, VerticalFlip, 
        RandomBrightness, RandomContrast, RandomBrightnessContrast, Rotate, ShiftScaleRotate, Cutout, 
        IAAAdditiveGaussianNoise, Transpose
        )
    from albumentations.pytorch import ToTensorV2
    from albumentations import ImageOnlyTransform

    import timm

    from torch.cuda.amp import autocast, GradScaler

    import warnings 
    warnings.filterwarnings('ignore')

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # ====================================================
    # Utils
    # ====================================================
    def get_score(y_true, y_pred):
        scores = []
        for i in range(y_true.shape[1]):
            score = roc_auc_score(y_true[:,i], y_pred[:,i])
            scores.append(score)
        avg_score = np.mean(scores)
        return avg_score, scores


    def get_result(result_df):
        preds = result_df[[f'pred_{c}' for c in CFG.target_cols]].values
        labels = result_df[CFG.target_cols].values
        score, scores = get_score(labels, preds)
        LOGGER.info(f'Score: {score:<.4f}  Scores: {np.round(scores, decimals=4)}')


    @contextmanager
    def timer(name):
        t0 = time.time()
        LOGGER.info(f'[{name}] start')
        yield
        LOGGER.info(f'[{name}] done in {time.time() - t0:.0f} s.')


    def init_logger(log_file=OUTPUT_DIR+'inference.log'):
        from logging import getLogger, INFO, FileHandler,  Formatter,  StreamHandler
        logger = getLogger(__name__)
        logger.setLevel(INFO)
        handler1 = StreamHandler()
        handler1.setFormatter(Formatter("%(message)s"))
        handler2 = FileHandler(filename=log_file)
        handler2.setFormatter(Formatter("%(message)s"))
        logger.addHandler(handler1)
        logger.addHandler(handler2)
        return logger

    LOGGER = init_logger()


    def seed_torch(seed=42):
        random.seed(seed)
        os.environ['PYTHONHASHSEED'] = str(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed(seed)
        torch.backends.cudnn.deterministic = True

    seed_torch(seed=CFG.seed)

    test = pd.read_csv('../input/hpa-single-cell-image-classification/sample_submission.csv')
    print(test.shape)
    test.head()

    if CFG.debug:
        test = test.head()

    # ====================================================
    # Dataset
    # ====================================================
    class TestDataset(Dataset):
        def __init__(self, df, transform=None):
            self.df = df
            self.file_names = df['ID'].values
            self.transform = transform

        def __len__(self):
            return len(self.df)

        def __getitem__(self, idx):
            file_name = self.file_names[idx]
            file_path = f'{TEST_PATH}/{file_name}_green.png'
            image = cv2.imread(file_path)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            if self.transform:
                augmented = self.transform(image=image)
                image = augmented['image']
            return image

    # ====================================================
    # Transforms
    # ====================================================
    def get_transforms(*, data):

        if data == 'train':
            return Compose([
                Resize(CFG.size, CFG.size),
                Normalize(
                    mean=[0.485, 0.456, 0.406],
                    std=[0.229, 0.224, 0.225],
                ),
                ToTensorV2(),
            ])

        elif data == 'valid':
            return Compose([
                Resize(CFG.size, CFG.size),
                Normalize(
                    mean=[0.485, 0.456, 0.406],
                    std=[0.229, 0.224, 0.225],
                ),
                ToTensorV2(),
            ])

    # ====================================================
    # MODEL
    # ====================================================
    class CustomResNext(nn.Module):
        def __init__(self, model_name='resnext50_32x4d', pretrained=False):
            super().__init__()
            self.model = timm.create_model(model_name, pretrained=pretrained)
            n_features = self.model.fc.in_features
            self.model.fc = nn.Linear(n_features, CFG.target_size)

        def forward(self, x):
            x = self.model(x)
            return x

    class CustomEfficientNet(nn.Module):
        def __init__(self, model_name=CFG.model_name, pretrained=False):
            super().__init__()
            self.model = timm.create_model(CFG.model_name, pretrained=pretrained)
            n_features = self.model.classifier.in_features
            self.model.classifier = nn.Linear(n_features, CFG.target_size)

        def forward(self, x):
            x = self.model(x)
            return x

    # ====================================================
    # Helper functions
    # ====================================================
    def inference(model, states, test_loader, device):
        model.to(device)
        tk0 = tqdm(enumerate(test_loader), total=len(test_loader))
        probs = []
        for i, (images) in tk0:
            images = images.to(device)
            avg_preds = []
            for state in states:
                model.load_state_dict(state['model'])
                model.eval()
                with torch.no_grad():
                    y_preds = model(images)
                avg_preds.append(y_preds.sigmoid().to('cpu').numpy())
            avg_preds = np.mean(avg_preds, axis=0)
            probs.append(avg_preds)
        probs = np.concatenate(probs)
        return probs

    def inference0(models, states, test_loader, device):
        model.to(device)
        tk0 = tqdm(enumerate(test_loader), total=len(test_loader))
        probs = []
        for i, (images) in tk0:
            images = images.to(device)
            avg_preds = []
            for state in states:
                with torch.no_grad():
                    y_preds1 = model(images)
                    y_preds2 = model(images.flip(-1))
                y_preds = (y_preds1.sigmoid().to('cpu').numpy() + y_preds2.sigmoid().to('cpu').numpy()) / 2
                avg_preds.append(y_preds)
            avg_preds = np.mean(avg_preds, axis=0)
            probs.append(avg_preds)
        probs = np.concatenate(probs)
        return probs

    # ====================================================
    # inference
    # ====================================================
    ##model = CustomResNext(CFG.model_name, pretrained=False)
    model = CustomEfficientNet(CFG.model_name, pretrained=False)
    states = [torch.load(MODEL_DIR+f'{CFG.model_name}_fold{fold}_best.pth') for fold in CFG.trn_fold]
    test_dataset = TestDataset(test, transform=get_transforms(data='valid'))
    test_loader = DataLoader(test_dataset, batch_size=CFG.batch_size, shuffle=False, 
                             num_workers=CFG.num_workers, pin_memory=True)
    predictions = inference(model, states, test_loader, device)
    # submission
    test[CFG.target_cols] = predictions
    #test[['ID'] + CFG.target_cols].to_csv(OUTPUT_DIR+'submission.csv', index=False)
    #test.head()
    
    return test

In [None]:
im1=seres_imlevel()

In [None]:
##im2=eff_imlevel()

In [None]:
#Image Level Model EFF B7(TPU) and Inference 

sub_df = pd.read_csv('../input/hpa-single-cell-image-classification/sample_submission.csv')

if sub_df.shape[0] != 559:
    def auto_select_accelerator():
        try:
            tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
            tf.config.experimental_connect_to_cluster(tpu)
            tf.tpu.experimental.initialize_tpu_system(tpu)
            strategy = tf.distribute.experimental.TPUStrategy(tpu)
            print("Running on TPU:", tpu.master())
        except ValueError:
            strategy = tf.distribute.get_strategy()
        print(f"Running on {strategy.num_replicas_in_sync} replicas")

        return strategy


    def build_decoder(with_labels=True, target_size=(300, 300), ext='jpg'):
        def decode(path):
            file_bytes = tf.io.read_file(path)
            if ext == 'png':
                img = tf.image.decode_png(file_bytes, channels=3)
            elif ext in ['jpg', 'jpeg']:
                img = tf.image.decode_jpeg(file_bytes, channels=3)
            else:
                raise ValueError("Image extension not supported")

            img = tf.cast(img, tf.float32) / 255.0
            img = tf.image.resize(img, target_size)

            return img

        def decode_with_labels(path, label):
            return decode(path), label

        return decode_with_labels if with_labels else decode


    def build_augmenter(with_labels=True):
        def augment(img):
            img = tf.image.random_flip_left_right(img)
            img = tf.image.random_flip_up_down(img)
            return img

        def augment_with_labels(img, label):
            return augment(img), label

        return augment_with_labels if with_labels else augment


    def build_dataset(paths, labels=None, bsize=32, cache=True,
                      decode_fn=None, augment_fn=None,
                      augment=True, repeat=True, shuffle=1024, 
                      cache_dir=""):
        if cache_dir != "" and cache is True:
            os.makedirs(cache_dir, exist_ok=True)

        if decode_fn is None:
            decode_fn = build_decoder(labels is not None)

        if augment_fn is None:
            augment_fn = build_augmenter(labels is not None)

        AUTO = tf.data.experimental.AUTOTUNE
        slices = paths if labels is None else (paths, labels)

        dset = tf.data.Dataset.from_tensor_slices(slices)
        dset = dset.map(decode_fn, num_parallel_calls=AUTO)
        dset = dset.cache(cache_dir) if cache else dset
        dset = dset.map(augment_fn, num_parallel_calls=AUTO) if augment else dset
        dset = dset.repeat() if repeat else dset
        dset = dset.shuffle(shuffle) if shuffle else dset
        dset = dset.batch(bsize).prefetch(AUTO)

        return dset

    COMPETITION_NAME = "hpa-single-cell-image-classification"
    strategy = auto_select_accelerator()
    BATCH_SIZE = strategy.num_replicas_in_sync * 16

    ##MSIZE = (224, 240, 260, 300, 380, 456, 528, 600)
    IMSIZE = (224, 240, 260, 300, 380, 456, 528, 600, 640)

    load_dir = f"/kaggle/input/{COMPETITION_NAME}/"
    sub_df = pd.read_csv('../input/hpa-single-cell-image-classification/sample_submission.csv')
    #sub_df = ss_df.copy()

    sub_df = sub_df.drop(sub_df.columns[1:],axis=1)

    for i in range(19):
        sub_df[f'{i}'] = pd.Series(np.zeros(sub_df.shape[0]))

    ###!= 559
    test_paths = load_dir + "/test/" + sub_df['ID'] + '_green.png'
    #test_paths = load_dir + "/test/" + sub_df['ID'] + '_blue.png'
    ##test_paths = load_dir + "/test/" + sub_df['ID'] + '_red.png'
    #test_paths = load_dir + "/test/" + sub_df['ID'] + '_yellow.png'
    # Get the multi-labels
    label_cols = sub_df.columns[1:]

    test_decoder = build_decoder(with_labels=False, target_size=(IMSIZE[7], IMSIZE[7]))
    #test_decoder = build_decoder(with_labels=False, target_size=(IMSIZE[8], IMSIZE[8]))
    dtest = build_dataset(
        test_paths, bsize=BATCH_SIZE, repeat=False, 
        shuffle=False, augment=False, cache=False,
        decode_fn=test_decoder
    )

    with strategy.scope():
        model = tf.keras.models.load_model(
            '../input/hpaclassificationefnb7train2weight/model_green.h5'
            #'../input/hpa-classification-efnb7-train/model_green.h5'
            #'../input/hpa-classification-efnb7-train-blue/model_blue.h5'
            #'../input/hpa-classification-efnb7-train-red/model_red.h5'
            #'../input/hpa-classification-efnb7-train-yellow/model_yellow.h5'
        )

    model.summary()
    
    im2=model.predict(dtest, verbose=1)
    sub_df[label_cols]=0.5*im1[label_cols]+0.5*im2
    

    sub_df.head()

    ss_df = pd.merge(ss_df, sub_df, on = 'ID', how = 'left')

    for i in range(ss_df.shape[0]):
        if ss_df.loc[i,'PredictionString'] == '0 1 eNoLCAgIMAEABJkBdQ==':
            continue
        a = ss_df.loc[i,'PredictionString']
        b = a.split()
        for j in range(int(len(a.split())/3)):
            for k in range(19):
                if int(b[0 + 3 * j]) == k:

                    c = b[0 + 3 * j + 1]               
                    b[0 + 3 * j + 1] = str(ss_df.loc[i,f'{k}'] * 0.5 + float(c) * 0.5)

        ss_df.loc[i,'PredictionString'] = ' '.join(b)

    ss_df = ss_df[['ID','ImageWidth','ImageHeight','PredictionString']]
    ss_df.to_csv('submission.csv',index = False)
else:
    def auto_select_accelerator():
        try:
            tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
            tf.config.experimental_connect_to_cluster(tpu)
            tf.tpu.experimental.initialize_tpu_system(tpu)
            strategy = tf.distribute.experimental.TPUStrategy(tpu)
            print("Running on TPU:", tpu.master())
        except ValueError:
            strategy = tf.distribute.get_strategy()
        print(f"Running on {strategy.num_replicas_in_sync} replicas")

        return strategy


    def build_decoder(with_labels=True, target_size=(300, 300), ext='jpg'):
        def decode(path):
            file_bytes = tf.io.read_file(path)
            if ext == 'png':
                img = tf.image.decode_png(file_bytes, channels=3)
            elif ext in ['jpg', 'jpeg']:
                img = tf.image.decode_jpeg(file_bytes, channels=3)
            else:
                raise ValueError("Image extension not supported")

            img = tf.cast(img, tf.float32) / 255.0
            img = tf.image.resize(img, target_size)

            return img

        def decode_with_labels(path, label):
            return decode(path), label

        return decode_with_labels if with_labels else decode


    def build_augmenter(with_labels=True):
        def augment(img):
            img = tf.image.random_flip_left_right(img)
            img = tf.image.random_flip_up_down(img)
            return img

        def augment_with_labels(img, label):
            return augment(img), label

        return augment_with_labels if with_labels else augment


    def build_dataset(paths, labels=None, bsize=32, cache=True,
                      decode_fn=None, augment_fn=None,
                      augment=True, repeat=True, shuffle=1024, 
                      cache_dir=""):
        if cache_dir != "" and cache is True:
            os.makedirs(cache_dir, exist_ok=True)

        if decode_fn is None:
            decode_fn = build_decoder(labels is not None)

        if augment_fn is None:
            augment_fn = build_augmenter(labels is not None)

        AUTO = tf.data.experimental.AUTOTUNE
        slices = paths if labels is None else (paths, labels)

        dset = tf.data.Dataset.from_tensor_slices(slices)
        dset = dset.map(decode_fn, num_parallel_calls=AUTO)
        dset = dset.cache(cache_dir) if cache else dset
        dset = dset.map(augment_fn, num_parallel_calls=AUTO) if augment else dset
        dset = dset.repeat() if repeat else dset
        dset = dset.shuffle(shuffle) if shuffle else dset
        dset = dset.batch(bsize).prefetch(AUTO)

        return dset

    COMPETITION_NAME = "hpa-single-cell-image-classification"
    strategy = auto_select_accelerator()
    BATCH_SIZE = strategy.num_replicas_in_sync * 16
    IMSIZE = (224, 240, 260, 300, 380, 456, 528, 600, 640)

    load_dir = f"/kaggle/input/{COMPETITION_NAME}/"
    sub_df = pd.read_csv('../input/hpa-single-cell-image-classification/sample_submission.csv')
    sub_df = ss_df.copy()

    sub_df = sub_df.drop(sub_df.columns[1:],axis=1)

    for i in range(19):
        sub_df[f'{i}'] = pd.Series(np.zeros(sub_df.shape[0]))


    test_paths = load_dir + "/test/" + sub_df['ID'] + '_green.png'
    #test_paths = load_dir + "/test/" + sub_df['ID'] + '_blue.png'
    #test_paths = load_dir + "/test/" + sub_df['ID'] + '_red.png'
    #test_paths = load_dir + "/test/" + sub_df['ID'] + '_yellow.png'
    # Get the multi-labels
    label_cols = sub_df.columns[1:]

    test_decoder = build_decoder(with_labels=False, target_size=(IMSIZE[7], IMSIZE[7]))
    dtest = build_dataset(
        test_paths, bsize=BATCH_SIZE, repeat=False, 
        shuffle=False, augment=False, cache=False,
        decode_fn=test_decoder
    )

    with strategy.scope():
        model = tf.keras.models.load_model(
            '../input/hpaclassificationefnb7train2weight/model_green.h5'
            ##'../input/hpa-classification-efnb7-train/model_green.h5'
            ##'../input/hpa-classification-efnb7-train-blue/model_blue.h5'
            #'../input/hpa-classification-efnb7-train-red/model_red.h5'
            ##'../input/hpa-classification-efnb7-train-yellow/model_yellow.h5'
        )

    model.summary()
    im2=model.predict(dtest, verbose=1)
    sub_df[label_cols]=0.5*im1[label_cols][:commit]+0.5*im2

    sub_df.head()

    ss_df = pd.merge(ss_df, sub_df, on = 'ID', how = 'left')

    for i in range(ss_df.shape[0]):
        if ss_df.loc[i,'PredictionString'] == '0 1 eNoLCAgIMAEABJkBdQ==':
            continue
        a = ss_df.loc[i,'PredictionString']
        b = a.split()
        for j in range(int(len(a.split())/3)):
            for k in range(19):
                if int(b[0 + 3 * j]) == k:

                    c = b[0 + 3 * j + 1]               
                    b[0 + 3 * j + 1] = str(ss_df.loc[i,f'{k}'] * 0.5 + float(c) * 0.5)

        ss_df.loc[i,'PredictionString'] = ' '.join(b)

    ss_df = ss_df[['ID','ImageWidth','ImageHeight','PredictionString']]
    ss_df.to_csv('submission.csv',index = False)