Based on http://arxiv.org/abs/1512.04150
"Learning Deep Features for Discriminative Localization" by Zhou et al.

and

Based on https://arxiv.org/abs/2012.04846
"SnapMix - Semantically Proportional Mixing for Augmentation" by Huang et al.

# # Using ResNet50

In [None]:
import cv2
import pandas as pd
import numpy as np
import os
from pathlib import Path
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
import tensorflow.keras.preprocessing.image

base_path = Path('../input/cassava-leaf-disease-classification')
train_directory = os.path.join(base_path,'train_images')
test_directory = os.path.join(base_path,'test_images')

train_images = os.listdir(train_directory)
test_images = os.listdir(test_directory)

data_df = pd.read_csv(os.path.join(base_path,'train.csv'))

In [None]:
#define input-parameters for snapmix:

# Model input parameters:
batch_size = 32
image_width = 256
image_height = 256
input_shape=(image_width, image_height, 3)

In [None]:

def snapmix_batch_loss(is_augmented, label_batch, y_pred, label_batch2 = None, box_weights1 = None, box_weights2 = None):
    """
    Calculates the loss for snap-mix algorithm if is_augmented = True, calculates sparse-categorical-crossentropy loss, if is_augmented = False
    
    Args:
        is_augmented (bool) : determines if snap-mix loss function is used or not
        label_batch : true labels
        y_pred : predicted labels
        label_batch2 : labels of patched-in images
        box_weights1 : semantic box weights of patched-into images
        box_weights2 : semantic box weights of patched-in images
    
    Returns:
        snap-mix loss or sparse-categorical-crossentropy loss
    """
    if is_augmented:
        loss1 = tf.keras.losses.sparse_categorical_crossentropy(label_batch, y_pred)
        loss2 = tf.keras.losses.sparse_categorical_crossentropy(label_batch2, y_pred)
        
        return tf.math.reduce_mean(tf.math.multiply(loss1, (1 - box_weights1)) + tf.math.multiply(loss2, box_weights2),
                                   axis=0)

    return tf.math.reduce_mean(tf.keras.losses.sparse_categorical_crossentropy(label_batch, y_pred))


def snapmix_batch_augmentation(class_activation_model, model, img_batch, label_batch, output_layer_name, alpha=0.2):
    """
    Applies, the SnapMix-augmentation to the images and labels within a data batch with respect to a model.

    Args:
        class_activation_model (model) :
        model (model) :
        img_batch (tf.tensor) : batch with images, all the same shape
        label_batch (numpy list) : batch with labels for the images
        output_layer_name (string) : name of the final output-layer
        alpha (float), optional: parameter for beta-distribution generating image shrinking-factor for box-area

    Returns:
        augmented_images : the augmented input-images
        label_batch2 : the labels of the images that have been patched into the input-images
        box_weights1 : batch of semantic weights of cut-out-boxes
        box_weights2 : batch of semantic weights of patched-in-boxes
    """

    batch_size = img_batch.shape[0]
    img_width = img_batch.shape[1]
    img_height = img_batch.shape[2]
    
    # get classificator weights:
    classificator_weights = model.get_layer(output_layer_name).get_weights() # returns: (weights, biases)
    classificator_weights = classificator_weights[0] 
    
    box1 = random_box(img_width, img_height, alpha=alpha)
    box2 = random_box(img_width, img_height, alpha=alpha)

    # build another image batch from the input batch:
    rng = np.random.default_rng()
    permutation = rng.permutation(batch_size)
    label_batch = label_batch.numpy().astype(int)

    img_batch2 = np.copy(img_batch)
    img_batch2 = img_batch2[permutation]
    label_batch2 = np.copy(label_batch)
    label_batch2 = label_batch2[permutation]

    # get spm and calculate boxweights:
    SPM1 = batch_semantic_percentage_map(
        class_activation_model=class_activation_model,
        classificator_weights=classificator_weights,
        img_batch=img_batch,
        label_batch=label_batch)

    SPM2 = np.copy(SPM1)
    SPM2 = SPM2[permutation, :, :]
    x11, y11, x12, y12 = box1
    x21, y21, x22, y22 = box2

    cropped_SPM1 = SPM1[:, x11:(x12 + 1), y11:(y12 + 1)]
    #box_weights1 = tf.reduce_sum(cropped_SPM1, axis=[1, 2]).numpy()
    box_weights1 = np.sum(cropped_SPM1, axis=(1, 2))
    cropped_SPM2 = SPM2[:, x21:(x22 + 1), y21:(y22 + 1)]
    #box_weights2 = tf.reduce_sum(cropped_SPM2, axis=[1, 2]).numpy()
    box_weights2 = np.sum(cropped_SPM2, axis=(1, 2))
    
    # some normalization for patching with equal labels:
    same_label = label_batch == label_batch2
    tmp = np.copy(box_weights1)
    box_weights1[same_label] += box_weights2[same_label]
    box_weights2[same_label] += tmp[same_label]

    # fix for cases where box_weights are not well defined:
    rel_area1 = (y12 - y11) * (x12 - x11) /  (img_width * img_height)
    rel_area2 = (y22 - y21) * (x22 - x21) / (img_width * img_height)
    box_weights1[np.isnan(box_weights1)] = rel_area1
    box_weights2[np.isnan(box_weights2)] = rel_area2

    #crop and paste images:
    #cropped = img_batch2[:, x21: x22, y21: y22]
    cropped = img_batch2[:, x21: x22, y21: y22,:]
    resized_cropped = np.zeros((cropped.shape[0], x12 - x11, y12 - y11, cropped.shape[3]))
    #print("cropped.shape: {}".format(cropped.shape))
    #print("resized_cropped.shape: {}".format(resized_cropped.shape))
    for i in range(batch_size):
        resized_cropped[i] = cv2.resize(cropped[i,:,:], (y12 - y11, x12 - x11), interpolation=cv2.INTER_CUBIC)
    #cropped = tf.image.resize(cropped, (x12 - x11, y12 - y11)).numpy()
    # copy images otherwise originals are spoiled:
    augmented_images = np.copy(img_batch)
    augmented_images[:, x11: x12, y11:y12] = resized_cropped

    return augmented_images, label_batch2, box_weights1, box_weights2


def batch_semantic_percentage_map(class_activation_model, classificator_weights, img_batch, label_batch):
    """
    Calculates the SPM - Semantic Percentage Map of a batch of images.

    Args:
        class_activation_model : the part of the model to calculate the class-activations from (the part before the classifier)
        classificator_weights : the weights of the last layer of the classifier, i.e. for a softmax-layer:
            classificator_weights = model.get_layer("SoftMaxLayerName").get_weights()

    Returns:
        the SPMs (Semantic Percentage Maps) for a batch of images.
    """
    feature_maps_batch = class_activation_model.predict(img_batch)

    # Calculate Class Activation Map (CAM):
    batch_size = feature_maps_batch.shape[0]
    feature_map_width = feature_maps_batch.shape[1]
    feature_map_height = feature_maps_batch.shape[2]
    CAM_batch = np.zeros((batch_size, feature_map_width, feature_map_height))
    clw_matrix = classificator_weights[:, label_batch]
    for i in range(batch_size):
        #CAM_batch[i, :, :] = tf.tensordot(clw_matrix[:, i], feature_maps_batch[i, :, :, :], axes=[[0], [2]])
        CAM_batch[i, :, :] = np.tensordot(clw_matrix[:, i], feature_maps_batch[i, :, :, :], axes=([0], [2]))

    # upsampling feature map to size of image:
    image_width = img_batch.shape[1]
    image_height = img_batch.shape[2]
    resized_CAM_batch = np.zeros((batch_size, image_width, image_height))
    for i in range(batch_size):
        resized_CAM_batch[i,:,:] = cv2.resize(CAM_batch[i, :, :], (image_width, image_height), interpolation=cv2.INTER_CUBIC)
        
    #CAM_batch = np.expand_dims(CAM_batch, axis=-1)
    #CAM_batch = tf.image.resize(images=CAM_batch, size=(image_width, image_height), method="bilinear")
    #CAM_batch = np.squeeze(CAM_batch, axis=-1)

    #CAM_batch -= tf.math.reduce_min(CAM_batch)
    resized_CAM_batch -= np.amin(resized_CAM_batch)
    #normalization_factor = tf.reduce_sum(CAM_batch).numpy() + 1e-8
    normalization_factor = np.sum(resized_CAM_batch) + 1e-8
    resized_CAM_batch /= normalization_factor

    return resized_CAM_batch


def random_box(im_width, im_height, alpha, minimal_width=2, minimal_height=2):
    """
    Returns a random box=(x1, y1, x2, y2) with 0 < x1, x2 < im_width
    and 0< y1, y2, < im_height that spans an area equal to
    lambda_img * (x2 - x1) * (y2 - y1), where lambda_img is randomly drawn from a beta-distribution
    beta(alpha, alpha)
    """
    rng = np.random.default_rng()
    random_width = im_width + 1
    random_height = 0

    while random_width > im_width or random_height > im_height or random_height < minimal_height or \
            random_width < minimal_width:
        lambda_img = rng.beta(alpha, alpha)
        if (lambda_img < 1 and lambda_img > 0):
            random_width = int(rng.integers(minimal_width, im_width) * np.sqrt(lambda_img) // 1)
            #random_width = random_width.astype(int)

            random_height = int(rng.integers(minimal_height, im_height) * np.sqrt(lambda_img) // 1)
            #random_height = random_height.astype(int)

    left_upper_x = rng.integers(0, im_width - random_width, endpoint=True)
    left_upper_y = rng.integers(0, im_height - random_height, endpoint=True)

    box = (left_upper_x,
           left_upper_y,
           left_upper_x + random_width - 1,
           left_upper_y + random_height - 1)

    return box


In [None]:
# Define Data Generators as this DataSequence-Class:

from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.utils import Sequence
import math
import random
import numpy as np


class DataSequence(Sequence):
    """
    Keras Sequence object reading data-files (images) from a directory, while file-names and labels are provided by a data-frame.
    Providing data-label pairs in batches.
    """

    def __init__(self, df, image_path, batch_size, img_size, shuffle=True, preprocessing_function = None):
        """
        Initialization
        Args:
            df (pandas data-frame) : to be read from, containing image-name-column and label-column
            image_path (string): path to images location (directory)
            batch_size (int): batch size at each iteration
            img_size (list): image-size, ex. [28, 28]
            shuffle (bool): True to shuffle label indexes after every epoch
            preprocessing_function: 

        Returns:
            batch of images - (batch_size, img_size[0], img_size[1], channels)  resized to img_size and rescaled with 1./255,
            batch of labels - (batch_Size)
        """
        self.df = df
        self.batch_size = batch_size
        self.img_size = img_size
        self.shuffle = shuffle
        rng = np.random.default_rng()
        if preprocessing_function:
            self.preprocessing_function = preprocessing_function
        else:
            self.preprocessing_function = lambda x: x
        
        # Take labels and a list of image locations in memory:
        self.label_column = df.columns[1]
        self.image_column = df.columns[0]
        self.labels = self.df[self.label_column].values
        self.im_list = self.df[self.image_column].apply(lambda x: os.path.join(image_path, x)).tolist()

    def __len__(self):
        """returns number of full batches available"""
        return int(math.ceil(len(self.df) / float(self.batch_size)))

    def on_epoch_end(self):
        pass
        #if self.shuffle:
        #    rng.shuffle(self.labels)
        #    rng.shuffle(self.im_list)

    def get_batch_labels(self, idx):
        # Fetch a batch or what is left of labels:
        if len(self.df) >= (idx + 1) * self.batch_size:
            return self.labels[idx * self.batch_size: (idx + 1) * self.batch_size]
        else:
            return self.labels[idx * self.batch_size: len(self.df)]

    def get_batch_features(self, idx):
        # Fetch a batch or what is left of images:
        if len(self.df) >= (idx + 1) * self.batch_size:
            return [
                self.preprocessing_function(tf.image.resize(tf.keras.preprocessing.image.img_to_array(load_img(im)) * 1. / 255, size=self.img_size))
                for im in self.im_list[idx * self.batch_size: (1 + idx) * self.batch_size]]
        else:
            return [
                self.preprocessing_function(tf.image.resize(tf.keras.preprocessing.image.img_to_array(load_img(im)) * 1. / 255, size=self.img_size))
                for im in self.im_list[idx * self.batch_size: len(self.df)]]

    def __getitem__(self, idx):
        batch_images = tf.stack(self.get_batch_features(idx), axis=0)
        batch_labels = tf.stack(self.get_batch_labels(idx), axis=0)
        
        return batch_images, batch_labels



# Load the ResNet50 pre-trained from the added data-set "tf-keras-resnet":

https://www.kaggle.com/xhlulu/tf-keras-resnet

(other weight files did not match format)


In [None]:
from tensorflow.keras.applications import ResNet50

conv_base = ResNet50(include_top=False, weights="../input/tf-keras-resnet/resnet50_notop.h5", input_shape=input_shape)

conv_base.trainable = False
conv_base.summary()

In [None]:
# Define the Conv Base Model/ class_activation_model - the pre-trained model without it's classificator part/ the model to read the class-activations from:
# Since we have a big enough convolutional layer at the end of our conv_base, we skip the resolution-increasing conv2d(1024) layer:

from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, GlobalAveragePooling2D, Dense, Conv2D

# Conv Base Model:

# keep all layers from the conv_base up to the last convolution:
conv_base_model = Model(conv_base.input, conv_base.layers[-4].output, name="ConvBaseModel")

#for layer in conv_base_model.layers:
#    layer.trainable = True
    
model_input = tf.keras.Input(input_shape)

# with additional Conv2D layer for increased resolution as in research-paper
#model_output = Conv2D(1024, (3,3), padding="same", trainable=True, name="HighResolutionLayer")(conv_base_model(model_input)) 
model_output = Conv2D(512, (3,3), padding="same", trainable=True, name="HighResolutionLayer")(conv_base_model(model_input)) 

# without additional Conv2D layer -> uncomment following line:
#model_output = conv_base_model(model_input) 
class_activation_model = Model(inputs=[model_input], outputs=[model_output], name="ClassActivationModel")

class_activation_model.summary()
conv_base_model.summary()

In [None]:
# Freeze/ Thaw layers of the Conv Base Model for fine-tuning:

for layer in class_activation_model.layers:
    if layer.name == "conv5_block3_3_conv":
        #layer.trainable = True # uncomment when fine-tuning
        layer.trainable = False # comment when fine-tuning
    else:
        layer.trainable = False
        
    if layer.name == "HighResolutionLayer":
        layer.trainable = True

class_activation_model.summary()

In [None]:
# Define the model "SnapMixOnResNet50Model":

output_layer_name="SoftMaxClassifier"
model_name = "SnapMixOnResNet50Model"

model_input = tf.keras.Input(input_shape) #
class_activation_output = class_activation_model(model_input) #
output_ = GlobalAveragePooling2D(name="GlobalAverageLayer")(class_activation_output)
model_output = Dense(5, activation="softmax", name=output_layer_name, trainable=True)(output_)

model = Model(inputs=[model_input,], outputs=[model_output,], name=model_name)
model.summary()

In [None]:
# Parameters for SnapMix training-loop:

# parameter for beta-distribution:
alpha = 0.20
kfolds= 5

# Training parameters:
##pretraining
epochs = 1 # pretraining -> learning_rate decrease and ClassActivationModel last layer training
learning_rate = 1e-3 # initial learning_rate

## training: - make last conv layer in ResNet50 trainable first!
#epochs = 4
#learning_rate = 1e-4
snapmix_augmentation_probability = 0.6 # for algorithm with x% snapmix
#snapmix_augmentation_probability = 0.0 # for algorithm WITHOUT snapmix

# Optimizers and metrics and scheduler:
#optimizer = keras.optimizers.SGD(learning_rate=lr,)
#optimizer = tf.keras.optimizers.RMSprop(learning_rate=lr_schedule,) 
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate,)

train_acc_metric = tf.keras.metrics.SparseCategoricalAccuracy()
val_acc_metric = tf.keras.metrics.SparseCategoricalAccuracy()

# logging activities:
#log_batches = False
log_batches = False
log_after_n_batches = 100

# minimal accuricy to save model while training:
min_val_acc = 0.2 # pre-training: one in five is base-line success-probability
#min_val_acc = 0.5 # fine-tuning: set value after pre-training
best_model_name = "Cassava_SnapMix_ResNet50_subm2" # pre-training ResNet50

# preprocessing function if required:
#preprocessing_function = tf.keras.applications.efficientnet.preprocess_input # preproc for EfficientNetB3
#preprocessing_function = None # preproc for  VGG16 
from tensorflow.keras.applications.resnet50 import preprocess_input
preprocessing_function = preprocess_input # ResNet50

In [None]:
# define augmentation for non-snapmix augmented data:
from tensorflow import keras
from tensorflow.keras import layers

data_augmentation = tf.keras.Sequential(
    [
        tf.keras.layers.experimental.preprocessing.RandomFlip("horizontal"),
        tf.keras.layers.experimental.preprocessing.RandomRotation(0.3),
        tf.keras.layers.experimental.preprocessing.RandomZoom(0.3),
    ]
)

In [None]:
# steps in the training-loop:

# accelerate the training by tensorflow graph building using @tf.function decorators:

# !!! ... unfortunately this leads to memory leaks.... !!!

#@tf.function
def train_step(is_augmented, optimizer, aug_image_batch, y_batch_train, label_batch2=None, box_weights1=None, box_weights2=None):
    with tf.GradientTape() as tape:
        y_pred =  model(aug_image_batch, training=True) 
        loss_value = snapmix_batch_loss(is_augmented, y_batch_train, y_pred, label_batch2, box_weights1, box_weights2)

    grads = tape.gradient(loss_value, model.trainable_weights)
    optimizer.apply_gradients(zip(grads, model.trainable_weights))
    
    # Update train_acc_metric:
    train_acc_metric.update_state(y_batch_train, y_pred)
    
    return loss_value


#@tf.function
def validation_step(val_dataset):
    for x_batch_val, y_batch_val in val_dataset:
        y_val = model(x_batch_val, training=False)
        # Update val_acc_metric:
        val_acc_metric.update_state(y_batch_val, y_val)
        

def save_best_model(val_acc, model_name, min_val_acc=0):
    if val_acc > min_val_acc + 0.01: # save if model improved 1 percent
        min_val_acc = val_acc
        model.save("./"+ model_name)
        print("Model saved to {}".format(model_name))
    
    return min_val_acc


def reduceLROnPlateau(learning_rate):
    learning_rate *= 0.1
    return tf.keras.optimizers.Adam(learning_rate=learning_rate,), learning_rate


def kFold(data_df, fold, k=1):
    fold_length = data_df.shape[0]//k
    val_df = data_df[fold * fold_length: (fold+1) * fold_length]
    train_df = pd.concat([data_df[:fold*fold_length], data_df[(fold+1)*fold_length:]], axis=0)
        
    return train_df, val_df


def find_batch_size(number_of_samples, min_batch_size, max_batch_size):
    """
    Finds the smalest batch_size between a min and a max batch-size dividing a number of samples
    without remainder (if possible). If the returned rest is not zero, no batch-size within
    the bounds could be found.
    Example: find_batch_size(number, min, number) finds a batch size in any case.

    :param number_of_samples: number of samples to be divided into batches
    :param min_batch_size: minimal desired number of samples in one batch
    :param max_batch_size: maximal desired number of samples in one batch
    :return: batch_size, steps (number of batch-iterations), rest (if not zero, no batch_size could be found)
    """
    batch_size = min_batch_size
    rest = number_of_samples % batch_size
    while rest != 0 and batch_size <= max_batch_size:
        batch_size += 1
        rest = number_of_samples % batch_size

    steps = number_of_samples / batch_size
    return batch_size, steps, rest

In [None]:
# SnapMix training-loop: #---from here #---to here
# Code For Kaggle Submission: move to separate cell after submission

#---from here
import time

# collect training-/ evaluation-results:
fold_val_accuracies = []
val_accuracies = []
batch_losses = []

rng = np.random.default_rng()

for epoch in range(epochs):
    print("\nStart of epoch %d" % (epoch,))
    start_time = time.time()
    
    #shuffle dataframe:
    data_df = data_df.sample(frac=1)
    
    for fold in range(kfolds):
        train_df, val_df = kFold(data_df, fold=fold, k=kfolds)
        # Instantiate the data generators:
        data_train = DataSequence(df=train_df,
                                  image_path = train_directory,                          
                                  img_size=[image_width, image_height],
                                  batch_size=batch_size,
                                  preprocessing_function=preprocessing_function,)
        data_val = DataSequence(df=val_df, 
                                image_path=train_directory,
                                img_size=[image_width, image_height],
                                batch_size=batch_size, 
                                preprocessing_function=preprocessing_function,)
    
        # Iterate over the batches of the dataset:
        for step, (x_batch_train, y_batch_train) in enumerate(data_train):
            r = rng.uniform()
            if r < snapmix_augmentation_probability:
                is_augmented = True
                aug_image_batch, label_batch2, box_weights1, box_weights2 = snapmix_batch_augmentation(
                    class_activation_model = class_activation_model,
                    model = model,
                    img_batch= x_batch_train, 
                    label_batch= y_batch_train,
                    output_layer_name = output_layer_name, 
                    alpha = alpha)
            else:
                label_batch2, box_weights1, box_weights2 = None, None, None
                is_augmented = False
                r2 = rng.uniform()
                if r2 < 0.5: # augment half of the time
                    aug_image_batch = data_augmentation(x_batch_train)
                else:
                    aug_image_batch = x_batch_train


            # TODO: as a @tf.function this causes a memory leak...:
            loss_value = train_step(is_augmented, optimizer, aug_image_batch, y_batch_train, 
                                    label_batch2, box_weights1, box_weights2)                        
            if log_batches:
                # Log every log_after_n_batches batches.
                    if step % log_after_n_batches == 0:
                        print("Samples seen : %d samples" % ((step + 1) * batch_size))
                        print("Epoch: {0}, Fold: {1}".format(epoch, fold))
                        print("Training loss (for one batch) at step %d: %.4f" % (step, float(loss_value)))
                        # Display metrics at the end of each batch cycle:
                        print("Training acc over batch-cycle: %.4f" % (float(train_acc_metric.result()),))
        
    
        # Run a validation loop at the end of each fold:
        validation_step(data_val) # TODO: as a @tf.function this causes a memory leak...      
        fold_val_acc = val_acc_metric.result()
        fold_val_accuracies.append(fold_val_acc)
        val_acc_metric.reset_states()
    
    optimizer, learning_rate = reduceLROnPlateau(learning_rate)
    
    # calculate average accuracy over the epochs:
    epoch_val_accuracy = np.mean(fold_val_accuracies)
    # Display metrics at the end of each epoch:
    print("Epoch: {}".format(epoch))
    print("Training acc over epoch: %.4f" % (float(train_acc_metric.result())))
    print("Evaluation acc over epoch: %.4f" % (float(epoch_val_accuracy)))
    # Reset training metrics at the end of each epoch:
    train_acc_metric.reset_states()
    min_val_acc = save_best_model(epoch_val_accuracy, best_model_name, min_val_acc= min_val_acc)
    print("Time taken: %.2fs" % (time.time() - start_time))

#---to here



In [None]:
# Code For Kaggle Submission:

# load best performing model:
model = keras.models.load_model("./"+ best_model_name)

# load data from test directory, predict and write csv-file for submission:
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import ImageDataGenerator


image_width = 256
image_height = 256
batch_size = 32

base_path = Path('../input/cassava-leaf-disease-classification')
test_directory = os.path.join(base_path,'test_images')
sample_submission_df = pd.read_csv(os.path.join(base_path, "sample_submission.csv"))

test_generator_factory = ImageDataGenerator(rescale=1./255)

test_data_generator = test_generator_factory.flow_from_dataframe(
    dataframe=sample_submission_df,
    directory=test_directory,
    x_col='image_id',
    seed=42,
    target_size = (image_width, image_height),
    class_mode=None,
    interpolation='bilinear',
    shuffle=True,
    batch_size=batch_size,    
)

step_size_test=find_batch_size(test_data_generator.n, 1, test_data_generator.batch_size)[1]

test_data_generator.reset()

# the following needs to be fixed: DataSequence relies on having images=df[0] AND labels=df[1]
# which is not the case for test data - though in the sample_submission.csv ... blablabla
#test_dataset = DataSequence(df=sample_submission_df,
#                            image_path = test_directory,                          
#                            img_size=[image_width, image_height],
#                            batch_size=batch_size,
#                            preprocessing_function=preprocessing_function,)

predictions_=model.predict(
    test_data_generator,
    steps=step_size_test,
    verbose=1)

predictions=np.argmax(predictions_,axis=1)
image_ids=test_data_generator.filenames
#image_ids = test_dataset.im_list # code, when DataSequence is used

submission_df=pd.DataFrame({"image_id":image_ids, "label":predictions})
submission_df.to_csv("submission.csv",index=False)