# Library

In [None]:
# ====================================================
# Library
# ====================================================
import sys
sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master')
import timm

import random
import os
import torch
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torchvision
from torchvision import models as tvmodels
from torch.cuda.amp import autocast, GradScaler
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
from tqdm import tqdm
import torch.nn.functional as F
from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold
import time
from scipy.special import softmax
import math
from matplotlib.pyplot import imread
import albumentations as A
from albumentations import Compose
from albumentations.pytorch import ToTensorV2
import numpy as np
import cv2
from sklearn.model_selection import GroupKFold, StratifiedKFold

import time
# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

# Config

In [None]:
# ====================================================
# Config
# ====================================================
DATA_PATH = '../input/cassava-leaf-disease-classification/'
TRAIN_DIR = DATA_PATH + 'train_images/'
TEST_DIR = DATA_PATH + 'test_images/'
MODEL_PATH = '../input/cassavanet-baseline-models/'

N_TTA = 4

HEIGHT = 512
WIDTH = 512
CHANNELS = 3

N_CLASSES = 5

MODEL_LIST = [0,1,2,3,4]

IMG_MEAN = [0.485, 0.456, 0.406] #Mean for normalization Transform cassava = [0.4303, 0.4967, 0.3134] imgnet = [0.485, 0.456, 0.406]
IMG_STD = [0.229, 0.224, 0.225] #STD for normalization Transform cassava = [0.2142, 0.2191, 0.1954] imgnet = [0.229, 0.224, 0.225]

# Seed

In [None]:
# ====================================================
# Seed
# ====================================================
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

SEED = 1111
seed_everything(SEED)  
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Model

In [None]:
# ====================================================
# Model
# ====================================================
class CassavaNet(nn.Module):
    def __init__(self, model_name=None, pretrained=False):
        super().__init__()
        self.model_name = model_name
        if model_name == 'deit_base_patch16_224' or model_name == 'deit_base_patch16_384':
            self.model = torch.hub.load('facebookresearch/deit:main', model_name, pretrained=pretrained)
        else:
            self.model = timm.create_model(model_name, pretrained=pretrained)
        if 'efficientnet' in model_name:
            self.n_features = self.model.classifier.in_features
            self.model.classifier = nn.Linear(self.n_features, N_CLASSES)
        elif model_name == 'vit_large_patch16_384' or model_name == 'deit_base_patch16_224' or model_name == 'deit_base_patch16_384':
            self.n_features = self.model.head.in_features
            self.model.head = nn.Linear(self.n_features, N_CLASSES)
        elif 'resnext' in model_name:
            self.n_features = self.model.fc.in_features
            self.model.fc = nn.Linear(self.n_features, N_CLASSES)
        
    def forward(self, x):
        return self.model(x)
    
    def freeze(self):
        # To freeze the residual layers
        for param in self.model.parameters():
            param.requires_grad = False
            
        if 'efficientnet' in self.model_name:
            for param in self.model.classifier.parameters():
                param.requires_grad = True
        elif self.model_name == 'vit_large_patch16_384' or 'deit_base_patch16_224':
            for param in self.model.head.parameters():
                param.requires_grad = True
        elif 'resnext' in self.model_name:
            for param in self.model.fc.parameters():
                param.requires_grad = True
            
    def unfreeze(self):
        # Unfreeze all layers
        for param in self.model.parameters():
            param.requires_grad = True

# Dataset

In [None]:
# ====================================================
# Dataset
# ====================================================
class GetData(Dataset):
    def __init__(self, Dir, FNames, labels,Type):
        self.dir = Dir
        self.fnames = FNames
        self.lbs = labels
        self.type = Type
        
    def __len__(self):
        return len(self.fnames)

    def __getitem__(self, index):
        x = imread(os.path.join(self.dir, self.fnames[index]))
        if "train" in self.type:
            aug_data = train_transforms(image = x)
            return aug_data['image'], self.lbs[index]            
        elif "valid" in self.type:
            aug_data = valid_transforms(image = x)
            return aug_data['image'], self.lbs[index]
        elif "tr-tst" in self.type:
            return x, self.lbs[index]
        elif "test" in self.type:
            return x, self.fnames[index]

# Augmentation

In [None]:
# ====================================================
# Augmentation
# ====================================================
Aug_Norm = A.Normalize(mean=IMG_MEAN, std=IMG_STD, max_pixel_value=255.0, p=1.0)
test_aug = Compose([
            A.HorizontalFlip(p=0.5),
            A.VerticalFlip(p=0.5),
            A.ShiftScaleRotate(p = 1.0),
            A.ColorJitter(brightness=0.1, contrast=0.2, saturation=0.2, hue=0.00, always_apply=False, p=1.0),
            A.RandomCrop(height= HEIGHT, width = WIDTH,always_apply=True, p=1.0),
            Aug_Norm,
            ToTensorV2(p=1.0)
        ], p=1.)

# Model Loading

In [None]:
# ====================================================
# Model Loading
# ====================================================
models = []
count = 0
for model_fpath in os.listdir(MODEL_PATH):
    if count in MODEL_LIST:
        print("Model Loaded:",model_fpath)
        model_name_split = model_fpath.split('_f')[0]
        model = CassavaNet(model_name_split,pretrained = False)
        info = torch.load(MODEL_PATH + model_fpath,map_location = torch.device(DEVICE))
        model.load_state_dict(info)
        models.append(model)
    count+=1

In [None]:
submission = pd.DataFrame()
list_files = os.listdir(TEST_DIR)
submission['image_id'] = pd.Series(list_files)
submission.head()

# TTA

In [None]:
# ====================================================
# TTA
# ====================================================


In [None]:
def get_name(file_path):
    parts = tf.strings.split(file_path, os.path.sep)
    name = parts[-1]
    return name

#Декодирование
def decode_image(image_data):
    image = tf.image.decode_jpeg(image_data, channels=3)#三通道的采样
    image = tf.cast(image, tf.float32) / 255.0##正则化
    
#     image = center_crop(image)
    return image


def center_crop(image):
    image = tf.reshape(image, [600, 800, CHANNELS]) # 原始尺寸
    
    h, w = image.shape[0], image.shape[1]
    if h > w:
        image = tf.image.crop_to_bounding_box(image, (h - w) // 2, 0, w, w)#按最小的边进行放缩
    else:
        image = tf.image.crop_to_bounding_box(image, 0, (w - h) // 2, h, h)
        
    image = tf.image.resize(image, [HEIGHT, WIDTH]) # Expected shape
    return image

def resize_image(image, label):
    image = tf.image.resize(image, [HEIGHT, WIDTH])
    image = tf.reshape(image, [HEIGHT, WIDTH, CHANNELS])
    return image, label


def process_path(file_path):
    name = get_name(file_path)
    img = tf.io.read_file(file_path)
    img = decode_image(img)
    return img, name


def get_dataset(files_path, shuffled=False, tta=False, extension='jpg'):
    dataset = tf.data.Dataset.list_files(f'{files_path}*{extension}', shuffle=shuffled)
    dataset = dataset.map(process_path, num_parallel_calls=AUTO)
#    if tta:
    dataset = dataset.map(data_augment, num_parallel_calls=AUTO)
    dataset = dataset.map(resize_image, num_parallel_calls=AUTO)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.prefetch(AUTO)
    return dataset


def count_data_items(filenames):
    n = [int(re.compile(r"-([0-9]*)\.").search(filename).group(1)) for filename in filenames]
    return np.sum(n)

In [None]:
#Аугментация (Некоторые из первого ноутбука)
def data_augment(image, label):
    p_spatial = tf.random.uniform([], 0, 1.0, dtype=tf.float32)
    p_rotate = tf.random.uniform([], 0, 1.0, dtype=tf.float32)
    p_pixel_1 = tf.random.uniform([], 0, 1.0, dtype=tf.float32)
    p_pixel_2 = tf.random.uniform([], 0, 1.0, dtype=tf.float32)
    p_pixel_3 = tf.random.uniform([], 0, 1.0, dtype=tf.float32)
    p_crop = tf.random.uniform([], 0, 1.0, dtype=tf.float32)
            
    # Flips
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_flip_up_down(image)
    if p_spatial > .75:
        image = tf.image.transpose(image)
        
    # 图形反转
    if p_rotate > .75:
        image = tf.image.rot90(image, k=3) # rotate 270º
    elif p_rotate > .5:
        image = tf.image.rot90(image, k=2) # rotate 180º
    elif p_rotate > .25:
        image = tf.image.rot90(image, k=1) # rotate 90º
        
    # Pixel-level transforms
    if p_pixel_1 >= .4:
        image = tf.image.random_saturation(image, lower=.7, upper=1.3)
    if p_pixel_2 >= .4:
        image = tf.image.random_contrast(image, lower=.8, upper=1.2)
    if p_pixel_3 >= .4:
        image = tf.image.random_brightness(image, max_delta=.1)
        
    # Crops
    if p_crop > .7:
        if p_crop > .9:
            image = tf.image.central_crop(image, central_fraction=.7)
        elif p_crop > .8:
            image = tf.image.central_crop(image, central_fraction=.8)
        else:
            image = tf.image.central_crop(image, central_fraction=.9)
    elif p_crop > .4:
        crop_size = tf.random.uniform([], int(HEIGHT*.8), HEIGHT, dtype=tf.int32)
        image = tf.image.random_crop(image, size=[crop_size, crop_size, CHANNELS])

    return image, label

In [None]:
!pip install --quiet /kaggle/input/kerasapplications
!pip install --quiet /kaggle/input/efficientnet-git
import math, os, re, warnings, random, glob
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow.keras.layers as L
import tensorflow.keras.backend as K
from tensorflow.keras import Sequential, Model
import efficientnet.tfkeras as efn

def seed_everything(seed=0):
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ['TF_DETERMINISTIC_OPS'] = '1'

seed = 21
seed_everything(seed)
warnings.filterwarnings('ignore')

In [None]:
import tensorflow as tf
import re
database_base_path = '/kaggle/input/cassava-leaf-disease-classification/'
submission = pd.read_csv(f'{database_base_path}sample_submission.csv')
def count_data_items(filenames):
    n = [int(re.compile(r"-([0-9]*)\.").search(filename).group(1)) for filename in filenames]
    return np.sum(n)
TEST_FILENAMES = tf.io.gfile.glob(f'{database_base_path}test_tfrecords/ld_test*.tfrec')
NUM_TEST_IMAGES = count_data_items(TEST_FILENAMES)

In [None]:
import glob
model_path_list = glob.glob('/kaggle/input/cassava-leaf-disease-training-with-tpu-v2-pods/*.h5')
model_path_list.sort()
print(*model_path_list, sep='\n')

In [None]:
start_time = time.time()
BATCH_SIZE = 16
test_set = GetData(TEST_DIR,submission['image_id'], submission['label'], Type = 'test')
test_loader = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=False, num_workers=8,pin_memory = True)
with torch.no_grad():
    for i, (images,labels) in enumerate(test_loader):
        voting = np.zeros((len(models),N_TTA,N_CLASSES))
        aug_images = np.zeros((N_TTA,CHANNELS,HEIGHT,WIDTH))
        for aug_no in range(N_TTA):
            img_np = images.numpy()

            aug_data = test_aug(image = np.reshape(img_np,(600,800,CHANNELS)))
            aug_images[aug_no,:,:,:] = aug_data['image'].numpy()
        aug_images = torch.from_numpy(aug_images).to(torch.float32).to(DEVICE)
        for model_no in range(len(models)):
            model = models[model_no]
            model = model.to(DEVICE)
            model.eval()            

            logits = model(aug_images)
            voting[model_no,:,:] = F.softmax(logits).cpu().numpy()

        voting = np.sum(voting,axis = 1) / N_TTA
        voting = np.sum(voting,axis = 0) / len(models)

        label = np.argmax(voting)
        submission['label'].loc[submission['image_id'] == labels[0]] = label
print(time.time()-start_time)
print(voting)

In [None]:
import math, os, re, warnings, random, glob
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow.keras.layers as L
import tensorflow.keras.backend as K
from tensorflow.keras import Sequential, Model
import efficientnet.tfkeras as efn

In [None]:

def model_fn(input_shape, N_CLASSES):
    inputs = L.Input(shape=input_shape, name='input_image')
    base_model = efn.EfficientNetB4(input_tensor=inputs, 
                                    include_top=False, 
                                    weights=None, 
                                    pooling='avg')##使用efficientnet

    x = L.Dropout(0.4)(base_model.output)
    output = L.Dense(N_CLASSES, activation='tanh', name='output')(x)
    model = Model(inputs=inputs, outputs=output)

    return model


model = model_fn((None, None, CHANNELS), N_CLASSES)
    

In [None]:
# try:
#     tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
#     print(f'Running on TPU {tpu.master()}')
# except ValueError:
#     tpu = None

# if tpu:
#     tf.config.experimental_connect_to_cluster(tpu)
#     tf.tpu.experimental.initialize_tpu_system(tpu)
#     strategy = tf.distribute.experimental.TPUStrategy(tpu)
# else:
#     strategy = tf.distribute.get_strategy()

AUTO = tf.data.experimental.AUTOTUNE
# REPLICAS = strategy.num_replicas_in_sync
# print(f'REPLICAS: {REPLICAS}')

In [None]:

files_path = f'{database_base_path}test_images/'
test_size = len(os.listdir(files_path))
test_preds = np.zeros((test_size, N_CLASSES))
TTA_STEPS = 10 # Do TTA if > 0 

for model_path in model_path_list:
    print(model_path)
    K.clear_session()
    model.load_weights(model_path)

    if TTA_STEPS > 0:
        test_ds = get_dataset(files_path, tta=True).repeat()
        ct_steps = TTA_STEPS * ((test_size/BATCH_SIZE) + 1)
        preds = model.predict(test_ds, steps=ct_steps, verbose=1)[:(test_size * TTA_STEPS)]
        preds = np.mean(preds.reshape(test_size, TTA_STEPS, N_CLASSES, order='F'), axis=1)
        test_preds += preds / len(model_path_list)
    else:
        test_ds = get_dataset(files_path, tta=False)
        x_test = test_ds.map(lambda image, image_name: image)
        test_preds += model.predict(x_test) / len(model_path_list)

test_names_ds = get_dataset(files_path)
image_names = [img_name.numpy().decode('utf-8') for img, img_name in iter(test_names_ds.unbatch())]
print(test_preds)

In [None]:
print(voting)
print(test_preds)
out=0.5*test_preds+0.5*np.mean(voting, axis=0)
out = softmax(out).argmax(axis=-1)
print(out)

In [None]:
submission = pd.DataFrame({'image_id': image_names, 'label': out})
submission.to_csv('submission.csv', index=False)
display(submission.head())