In [1]:
!pip install albumentations



In [2]:
import os

import numpy as np
import pandas as pd
from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"
import warnings

warnings.filterwarnings("ignore")

import matplotlib.pyplot as plt

%matplotlib inline
import sklearn
import tensorflow as tf
import tensorflow_addons as tfa
from PIL import Image
from sklearn.model_selection import (
    StratifiedKFold,
    StratifiedShuffleSplit,
    train_test_split,
)
from tensorflow import keras
from tensorflow.keras.applications.efficientnet import EfficientNetB4, EfficientNetB7
from tensorflow.keras.layers import (
    AveragePooling2D,
    AvgPool2D,
    Conv2D,
    Dense,
    Flatten,
    GlobalAveragePooling2D,
    MaxPooling2D,
)
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tqdm import notebook, tqdm
import albumentations

In [3]:
PATH = "/app/_data/"
BATCH_SIZE = 32
SEED = 42
IMAGE_SIZE = 380
NUM_CLASSES = 6
TRAIN_IMG_PATH = "/app/_data/380_npy/"
TEST_IMG_PATH = "/app/_data/test_images/"
feature_columns = ['complex', 'frog_eye_leaf_spot', 'healthy', 'powdery_mildew', 'rust', 'scab']

In [4]:
wrong = ['ead085dfac287263.jpg', '95276ccd226ad933.jpg', 'da8770e819d2696d.jpg']
df_labels = pd.read_csv('../_data/df_csv/labels_21_20.csv', index_col=[0])
df_labels = df_labels[['image', 'labels']]
test_20_labeled = pd.read_csv('../_data/df_csv/test20_labeled.csv', index_col=[0])
# new_images = pd.read_csv('/app/_data/df_csv/new_images1.csv')
# complex_labeled = pd.read_csv('/app/_data/df_csv/complex_labels.csv')
duplicates_labeled = pd.read_csv('/app/_data/df_csv/duplicates_labeled.csv')
duplicates_labeled = duplicates_labeled.dropna(axis=0).reset_index(drop=True)

In [5]:
df_labels = pd.concat([df_labels, test_20_labeled[['image','labels']]], axis=0, ignore_index = True)
df_labels = df_labels.query('image not in @wrong').reset_index(drop=True)
list_dup = duplicates_labeled['image'].tolist()

In [6]:
df_labels = df_labels.query('image not in @list_dup').reset_index(drop=True)
df_labels = pd.concat([df_labels,duplicates_labeled], axis=0, ignore_index = True)
df_labels = df_labels.join(df_labels['labels'].str.get_dummies(' '))
df_labels['image'] = df_labels['image'].str.replace('.jpg', '.npy')
df_labels = df_labels.sample(frac=1, random_state=SEED).reset_index(drop=True)
df_complex = df_labels.query('labels == "complex"').reset_index(drop=True)
df_without_complex = df_labels.query('labels != "complex"').reset_index(drop=True)
# df_labels = pd.concat([df_without_complex,complex_labeled], axis=0, ignore_index = True)


In [7]:
df_without_complex['image']

0        ae94c05ac772f623.npy
1               Train_623.npy
2        c1c61c8270af577d.npy
3        aaa525d257129f4b.npy
4        e287daa8d6b49a45.npy
                 ...         
19664    cfaab2997580eea0.npy
19665    d43871073fa3d217.npy
19666    a99fd5a64e6e2860.npy
19667    85ca3c8fa0b4dfd0.npy
19668    f08f70f59452da26.npy
Name: image, Length: 19669, dtype: object

In [8]:
# for 380*380
transform = albumentations.Compose(
    [
        albumentations.CLAHE(p=0.1, clip_limit=(1, 2), tile_grid_size=(8, 8)),
        albumentations.OneOf(
            [
                albumentations.MotionBlur((3, 3)),
                albumentations.MedianBlur(blur_limit=3),
                albumentations.GaussianBlur(
                    blur_limit=(3, 3), sigma_limit=0),
                albumentations.Blur(blur_limit=(3, 3)),
            ],
            p=0.2,
        ),
        albumentations.OneOf(
            [
                albumentations.GaussNoise(var_limit=[10, 50], mean=1),
                albumentations.ISONoise(intensity=(0.1, 1), color_shift=(0.01, 0.05)),
                albumentations.ImageCompression(
                    quality_lower=70, quality_upper=100, compression_type=1
                ),
                albumentations.MultiplicativeNoise(
                    multiplier=(0.95, 1.05), per_channel=True, elementwise=True
                ),
                albumentations.Downscale(
                    scale_min=0.6, scale_max=0.99, interpolation=4
                ),
            ],
            p=0.5,
        ),
        albumentations.OneOf(
            [
                albumentations.HueSaturationValue(
                    hue_shift_limit=(-7, 7),
                    sat_shift_limit=(-10, 10),
                    val_shift_limit=(-10, 10),
                ),
                albumentations.RandomBrightnessContrast(
                    brightness_limit=0.15,
                    contrast_limit=0.2,
                    brightness_by_max=True,
                ),
            ],
            p=0.5,
        ),
        albumentations.OneOf(
            [
                albumentations.OpticalDistortion(
                    distort_limit=0.05,
                    shift_limit=0.05,
                    border_mode=2,
                ),
                albumentations.ElasticTransform(
                    alpha=2.0,
                    sigma=50.0,
                    alpha_affine=10.0,
                    interpolation=0,
                    border_mode=2,
                ),
                albumentations.GridDistortion(
                    num_steps=5, distort_limit=0.3, interpolation=0, border_mode=2
                ),
            ],
            p=0.5,
        ),
        albumentations.OneOf(
            [
                albumentations.HorizontalFlip(),
                albumentations.VerticalFlip(),
            ],
            p=0.5,
        ),
        albumentations.OneOf(
            [
                albumentations.Rotate(
                    limit=(-180, 180), interpolation=0, border_mode=2
                ),
                albumentations.ShiftScaleRotate(
                    shift_limit=0.05,
                    scale_limit=0.05,
                    rotate_limit=180,
                    interpolation=0,
                    border_mode=2,
                ),
            ],
            p=0.5,
        ),
    ]
)

In [9]:
class Generator(keras.utils.Sequence):
    def __init__(
        self,
        df,
        images_src_dir,
        batch_size,
        target_image_size,
        shuffle=False,
        augment = True,
        crop = False,
        resize=False,
        normalize=False
    ):
        self.shuffle = shuffle
        self.batch_size = BATCH_SIZE
        self.df = df
        self.images_dir = images_src_dir
        self.target_image_size = (IMAGE_SIZE, IMAGE_SIZE)
        self.augment = augment
        self.crop = crop
        self.resize = resize
        self.normalize = normalize
        # create label index map
        self.labels = self._read_labels()
        self.n_samples = self.df.shape[0]
        self.n_batches = self.n_samples // self.batch_size
        # shuffle data, also repeated after each epoch if needed
        if self.shuffle:
            np.random.shuffle(self.labels)

    def _read_labels(self):
        """
        Returns list images mapping to 1-hot label
        """


        # label indexes 
        label_ixs = self.df[feature_columns].values
        image_ixs = self.df['image'].values
        labels = []

        for i in range(len(image_ixs)):
            labels.append([image_ixs[i], label_ixs[i]])
        return labels
    
    def __len__(self):
        """
        Length in batches
        """
        return self.n_batches

    def __getitem__(self, b_ix):
        """
        Produce batch, by batch index
        """

        assert b_ix < self.n_batches

        b_X = np.zeros(
            (self.batch_size, self.target_image_size[0], self.target_image_size[1], 3),
            dtype=np.uint8,
        )

        b_Y = np.zeros(
            (self.batch_size, self.df[feature_columns].shape[1]),
            dtype=np.uint8,
        )

        for i in range(self.batch_size):
            b_X[i], b_Y[i] = self.get_one(
                i + self.batch_size * b_ix,
            )

        return (b_X, b_Y)

    def get_one(
        self, one_ix
    ):
        """
        Get single item by absolute index
        """
        id = self.labels[one_ix][0]
        src_file = self.images_dir+id

        # read file
        x = np.load(src_file)
        if self.crop:
            coord = self.df[self.df["image"] == id][['x_min', 'y_min', 'x_max', 'y_max']].values[0]
            orig_hight = x.shape[0]
            orig_width = x.shape[1]
            x_min = coord[0]
            y_min = coord[1]
            x_max = coord[2]
            y_max = coord[3]
            x = x[np.int(y_min*orig_hight):np.int(y_max*orig_hight),np.int(x_min*orig_width):np.int(x_max*orig_width)]

        y = self.labels[one_ix][1]

        # augment
        if self.augment:
            x = self._augment_image(x)

        # normalize (sample-wise)
        if self.normalize:
            x = x.astype(np.float32)
            x = x - np.mean(x, axis=(0, 1))
            x = x / np.std(x, axis=(0, 1))
        return x.astype(np.uint8), y

    def _augment_image(self, x):
        """
        Randomply augment image
        """

        x = transform(image=x)['image']
        return x

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.labels)

In [10]:
# gen = Generator(
#     df=df_labels.head(1).sample(100, replace=True),
#     images_src_dir=TRAIN_IMG_PATH,
#     target_image_size=IMAGE_SIZE,
#     batch_size = BATCH_SIZE,
#     shuffle=True, augment=True, crop=False
# )
# gen.labels[9]
# plt.imshow(gen.get_one(1)[0])

# StratifiedShuffleSplit

In [11]:
# df_without_complex
# train, valid = train_test_split(df_without_complex, train_size=0.8, random_state=SEED)

In [12]:
# gen_train = Generator(
#     df=train,
#     images_src_dir=TRAIN_IMG_PATH,
#     target_image_size=IMAGE_SIZE,
#     batch_size = BATCH_SIZE,
#     shuffle=True, augment=True, crop=False, resize=False
# )
# gen_valid = Generator(
#     df=valid,
#     images_src_dir=TRAIN_IMG_PATH,
#     target_image_size=IMAGE_SIZE,
#     batch_size = BATCH_SIZE,
#     shuffle=False, augment=False, crop=False, resize=False
# )

In [13]:
# len(gen_valid.labels)
# x0, y0 = gen_train.get_one(55)
# plt.imshow(x0)

In [14]:
policy = keras.mixed_precision.experimental.Policy('mixed_float16')
keras.mixed_precision.experimental.set_policy(policy)

INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: Quadro RTX 5000, compute capability 7.5


In [15]:
def get_model():
    inputs = keras.Input(shape=(IMAGE_SIZE, IMAGE_SIZE, 3))
    base_model = keras.applications.EfficientNetB4(weights=None, include_top=False)
    base_model.load_weights(
        "/app/_data/models/efficientnet-b4_noisy-student_notop.h5",
        by_name=True,
        skip_mismatch=True,
    )
    x = base_model(inputs)
    x = keras.layers.GlobalAveragePooling2D(name="avg_pool")(x)
    x = keras.layers.Flatten(name="flatten")(x)
    outputs = keras.layers.Dense(NUM_CLASSES, activation="sigmoid")(x)
    model = keras.Model(inputs=inputs, outputs=outputs)
    model.compile(
        loss="binary_crossentropy",
        optimizer=Adam(lr=0.0005),
        metrics=[
            "acc",
            keras.metrics.Recall(),
            keras.metrics.Precision(),
            tfa.metrics.F1Score(num_classes=NUM_CLASSES, average="weighted"),
        ],
    )
    return model

In [27]:
skf = StratifiedKFold(n_splits=5, random_state=SEED, shuffle=True)

for i, (train_index, valid_index) in enumerate(skf.split(df_without_complex["image"], df_without_complex['labels'])):
    train, valid = df_without_complex.loc[train_index], df_without_complex.loc[valid_index]
    model_name = "eff4_ns_cr_complex_kf"+str(i+1)+".h5"
    callbacks = [
    keras.callbacks.EarlyStopping(
        monitor="val_f1_score",
        patience=10,
        restore_best_weights=True,
        verbose=1,
        mode="max",
    ),
    keras.callbacks.ModelCheckpoint(
        "/app/_data/models/complex/"+model_name,
        monitor="val_f1_score",
        verbose=1,
        save_best_only=True,
        save_weights_only=False,
        mode="max",
        save_freq="epoch",
    ),
    keras.callbacks.ReduceLROnPlateau(
        monitor="val_f1_score",
        factor=0.8,
        patience=5,
        verbose=1,
        mode="max",
        min_delta=1e-4,
        min_lr=0.00000001,
    ),
]
    model = get_model()
    gen_train = Generator(
    df=train,
    images_src_dir=TRAIN_IMG_PATH,
    target_image_size=IMAGE_SIZE,
    batch_size = BATCH_SIZE,
    shuffle=True, augment=True, crop=False, resize=False
)
    gen_valid = Generator(
    df=valid,
    images_src_dir=TRAIN_IMG_PATH,
    target_image_size=IMAGE_SIZE,
    batch_size = BATCH_SIZE,
    shuffle=False, augment=False, crop=False, resize=False
)

 history = model.fit(
        gen_train,
        validation_data=gen_valid,
        epochs=100,
        steps_per_epoch=(train.shape[0] * 0.8) // BATCH_SIZE,
        validation_steps=(valid.shape[0] * 0.2) // BATCH_SIZE,
        verbose=1,
        use_multiprocessing=True,
        callbacks=callbacks,
    )

Epoch 1/100

Epoch 00001: val_f1_score improved from -inf to 0.90236, saving model to /app/_data/models/complex/eff4_ns_cr_complex_kf1.h5
Epoch 2/100

Epoch 00002: val_f1_score improved from 0.90236 to 0.92283, saving model to /app/_data/models/complex/eff4_ns_cr_complex_kf1.h5
Epoch 3/100

Epoch 00003: val_f1_score did not improve from 0.92283
Epoch 4/100

Epoch 00004: val_f1_score did not improve from 0.92283
Epoch 5/100

Epoch 00005: val_f1_score did not improve from 0.92283
Epoch 6/100

Epoch 00006: val_f1_score did not improve from 0.92283
Epoch 7/100

Epoch 00007: val_f1_score did not improve from 0.92283

Epoch 00007: ReduceLROnPlateau reducing learning rate to 0.0004000000189989805.
Epoch 8/100

Epoch 00008: val_f1_score improved from 0.92283 to 0.92708, saving model to /app/_data/models/complex/eff4_ns_cr_complex_kf1.h5
Epoch 9/100

Epoch 00009: val_f1_score improved from 0.92708 to 0.92843, saving model to /app/_data/models/complex/eff4_ns_cr_complex_kf1.h5
Epoch 10/100

Epoc

Process Keras_worker_ForkPoolWorker-207:
Process Keras_worker_ForkPoolWorker-263:
Process Keras_worker_ForkPoolWorker-229:
Process Keras_worker_ForkPoolWorker-285:
Process Keras_worker_ForkPoolWorker-75:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
Traceback (most recent call last):
  File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  F

In [24]:
# base_model = keras.applications.EfficientNetB4(weights=None, include_top=False)
# base_model.load_weights('/app/_data/models/efficientnet-b4_noisy-student_notop.h5', by_name=True, skip_mismatch = True)

In [25]:
# inputs = keras.Input(shape=(IMAGE_SIZE, IMAGE_SIZE, 3))
# base_model = keras.applications.EfficientNetB4(weights=None, include_top=False)
# base_model.load_weights(
#     "/app/_data/models/efficientnet-b4_noisy-student_notop.h5",
#     by_name=True,
#     skip_mismatch=True,
# )
# x = base_model(inputs)
# x = keras.layers.GlobalAveragePooling2D(name="avg_pool")(x)
# x = keras.layers.Flatten(name="flatten")(x)
# outputs = keras.layers.Dense(NUM_CLASSES, activation="sigmoid")(x)
# model = keras.Model(inputs=inputs, outputs=outputs)

In [26]:
# model.compile(
#     loss="binary_crossentropy",
#     optimizer=Adam(lr=0.0005),
#     metrics=[
#         "acc",
#         keras.metrics.Recall(),
#         keras.metrics.Precision(),
#         tfa.metrics.F1Score(num_classes=NUM_CLASSES, average="weighted"),
#     ],
# )

In [27]:
# history = model.fit(
#     gen_train,
#     validation_data=gen_valid,
#     epochs=100,
#     steps_per_epoch=(train.shape[0] * 0.8) // BATCH_SIZE,
#     validation_steps=(valid.shape[0] * 0.2) // BATCH_SIZE,
#     verbose=1,
#     use_multiprocessing=True,
#     callbacks=callbacks,
# )

In [28]:
def plot_curves(model, loss=False):
    df = pd.DataFrame(model.history)
    if loss:
        plt.figure(figsize=(12, 6))
        plt.title("loss")
        plt.plot(df.iloc[:, 0], label="loss")
        plt.plot(df.iloc[:, 5], label="val_loss")
        plt.legend()
        plt.show()
    else:
        plt.figure(figsize=(12, 20))
        plt.subplot(4, 1, 1)
        plt.title("accuracy")
        plt.plot(df.iloc[:, 1], label="accuracy")
        plt.plot(df.iloc[:, 6], label="val_accuracy")
        plt.legend()

        plt.subplot(4, 1, 2)
        plt.title("recall")
        plt.plot(df.iloc[:, 2], label="recall")
        plt.plot(df.iloc[:, 7], label="val_recall")
        plt.legend()

        plt.subplot(4, 1, 3)
        plt.title("precision")
        plt.plot(df.iloc[:, 3], label="precision")
        plt.plot(df.iloc[:, 8], label="val_precision")
        plt.legend()

        plt.subplot(4, 1, 4)
        plt.title("f1")
        plt.plot(df.iloc[:, 4], label="f1")
        plt.plot(df.iloc[:, 9], label="val_f1")
        plt.legend()
        plt.show();

In [29]:
# plot_curves(history, loss=False)

In [30]:
# model = keras.models.load_model('/app/_data/models/albumentations/eff4_ns_cr_complex_1.h5')

In [10]:
def pred_to_labels(pred, thresh=0.5, labels=feature_columns):
    pred = [labels[i] for i in range(len(labels)) if pred[i] > thresh]
    pred = " ".join(pred)
    return pred
def predict_new(path, model):
    img = np.load(TRAIN_IMG_PATH+path)
    img = tf.expand_dims(img,axis = 0)
    pred = model.predict(img)
    return pred_to_labels(pred[0])

In [11]:
# df_sub = pd.DataFrame(columns=['image','labels'])
# for model_path in os.listdir('/app/_data/models/complex/'):
#     model = keras.models.load_model('/app/_data/models/complex/'+model_path)
#     for img_name in df_complex['image'].tolist():
#         pred = predict_new(img_name, model)

#         df_sub = df_sub.append( {'image': img_name, 'labels': pred}, ignore_index = True )

#     print(df_sub.head())


In [12]:
df_sub = df_sub.join(df_sub['labels'].str.get_dummies(' '))

NameError: name 'df_sub' is not defined

In [None]:
len(df_sub[df_sub['complex']==1])

In [13]:
df_sub = pd.read_csv("/app/_data/df_csv/complex_predicted_kf.csv", index_col=[0])

In [14]:
# df_sub['complex']=1

In [15]:
df_sub

Unnamed: 0,image,labels,complex,frog_eye_leaf_spot,healthy,powdery_mildew,rust,scab
0,a9e0c5d79494de98.npy,frog_eye_leaf_spot scab,0,1,0,0,0,1
1,e068953ecb74c535.npy,frog_eye_leaf_spot scab,0,1,0,0,0,1
2,cfab6e1e20b690b1.npy,scab,0,0,0,0,0,1
3,a9b116c36b47e307.npy,complex rust,1,0,0,0,1,0
4,e1dee4124f2155b3.npy,frog_eye_leaf_spot scab,0,1,0,0,0,1
...,...,...,...,...,...,...,...,...
7905,bf22f63ed33ac010.npy,complex rust,1,0,0,0,1,0
7906,ca1be18029db0bfe.npy,complex rust,1,0,0,0,1,0
7907,bf76e8d3c0c045c5.npy,frog_eye_leaf_spot,0,1,0,0,0,0
7908,d03fe072c99b26cc.npy,scab,0,0,0,0,0,1


In [16]:
df_pivot = df_sub.pivot_table(index = 'image', aggfunc=sum)

In [17]:
df_pivot[df_pivot<=4]=0

In [18]:
df_pivot[df_pivot>4]=1

In [19]:
df_pivot['complex'] = 1

In [20]:
for i in df_pivot.index.tolist():
    pred = pred_to_labels(df_pivot.loc[i].values)
    df_pivot.loc[i, 'labels'] = pred

In [21]:
df_pivot.sample()

Unnamed: 0_level_0,complex,frog_eye_leaf_spot,healthy,powdery_mildew,rust,scab,labels
image,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
ded57ce32063350a.npy,1,1,0,0,0,1,complex frog_eye_leaf_spot scab


In [22]:
df_pivot['labels'].value_counts()

complex frog_eye_leaf_spot         668
complex frog_eye_leaf_spot scab    374
complex scab                       198
complex                            156
complex rust                       152
complex powdery_mildew              20
complex rust scab                    8
complex frog_eye_leaf_spot rust      4
complex healthy                      2
Name: labels, dtype: int64

In [23]:
df_pivot['image'] = df_pivot.index
df_pivot = df_pivot.reset_index(drop=True)

In [24]:
df_all = pd.concat([df_without_complex, df_pivot], axis=0, ignore_index=True)

In [25]:
df_all = df_all.sample(frac=1)

In [26]:
df_all

Unnamed: 0,image,labels,complex,frog_eye_leaf_spot,healthy,powdery_mildew,rust,scab
12230,cd9dc8e19f180d0f.npy,rust,0,0,0,0,1,0
21037,f299a1e1e9c6154b.npy,complex frog_eye_leaf_spot,1,1,0,0,0,0
18916,Train_722.npy,rust,0,0,0,0,1,0
7462,85cb3b21f8adc156.npy,powdery_mildew complex,1,0,0,1,0,0
14385,c8841f4c7e7a31ba.npy,scab,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...
7118,e3bbd9b22d263230.npy,frog_eye_leaf_spot,0,1,0,0,0,0
19845,91db9b9a7cf40407.npy,complex frog_eye_leaf_spot scab,1,1,0,0,0,1
19745,860eeec66f368ac4.npy,complex frog_eye_leaf_spot scab,1,1,0,0,0,1
15442,c0c3773f333a0f11.npy,scab,0,0,0,0,0,1


# StratifiedShuffleSplit

In [27]:
policy = keras.mixed_precision.experimental.Policy('mixed_float16')
keras.mixed_precision.experimental.set_policy(policy)

INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: Quadro RTX 5000, compute capability 7.5


In [28]:
def get_model():
    inputs = keras.Input(shape=(IMAGE_SIZE, IMAGE_SIZE, 3))
    base_model = keras.applications.EfficientNetB4(weights=None, include_top=False)
    base_model.load_weights(
        "/app/_data/models/efficientnet-b4_noisy-student_notop.h5",
        by_name=True,
        skip_mismatch=True,
    )
    x = base_model(inputs)
    x = keras.layers.GlobalAveragePooling2D(name="avg_pool")(x)
    x = keras.layers.Flatten(name="flatten")(x)
    outputs = keras.layers.Dense(NUM_CLASSES, activation="sigmoid")(x)
    model = keras.Model(inputs=inputs, outputs=outputs)
    model.compile(
        loss="binary_crossentropy",
        optimizer=Adam(lr=0.0005),
        metrics=[
            "acc",
            keras.metrics.Recall(),
            keras.metrics.Precision(),
            tfa.metrics.F1Score(num_classes=NUM_CLASSES, average="weighted"),
        ],
    )
    return model

In [29]:
# inputs = keras.Input(shape=(IMAGE_SIZE, IMAGE_SIZE, 3))
# base_model = keras.applications.EfficientNetB4(weights=None, include_top=False)
# base_model.load_weights(
#     "/app/_data/models/efficientnet-b4_noisy-student_notop.h5",
#     by_name=True,
#     skip_mismatch=True,
# )
# x = base_model(inputs)
# x = keras.layers.GlobalAveragePooling2D(name="avg_pool")(x)
# x = keras.layers.Flatten(name="flatten")(x)
# outputs = keras.layers.Dense(NUM_CLASSES, activation="sigmoid")(x)
# model_eff4 = keras.Model(inputs=inputs, outputs=outputs)

In [30]:
skf = StratifiedKFold(n_splits=5, random_state=SEED, shuffle=True)

for i, (train_index, valid_index) in enumerate(skf.split(df_all["image"], df_all['labels'])):
    train, valid = df_all.loc[train_index], df_all.loc[valid_index]
    model_name = "eff4_ns_cr_with_predicted_complex_v2_kf"+str(i+1)+".h5"
    callbacks = [
    keras.callbacks.EarlyStopping(
        monitor="val_f1_score",
        patience=11,
        restore_best_weights=True,
        verbose=1,
        mode="max",
    ),
    keras.callbacks.ModelCheckpoint(
        "/app/_data/models/complex/"+model_name,
        monitor="val_f1_score",
        verbose=1,
        save_best_only=True,
        save_weights_only=False,
        mode="max",
        save_freq="epoch",
    ),
    keras.callbacks.ReduceLROnPlateau(
        monitor="val_f1_score",
        factor=0.8,
        patience=5,
        verbose=1,
        mode="max",
        min_delta=1e-4,
        min_lr=0.00000001,
    ),
        keras.callbacks.TensorBoard(
            log_dir="/app/.tensorboard/logs4/", histogram_freq=0
        )
]
    model = get_model()
    gen_train = Generator(
    df=train,
    images_src_dir=TRAIN_IMG_PATH,
    target_image_size=IMAGE_SIZE,
    batch_size = BATCH_SIZE,
    shuffle=True, augment=True, crop=False, resize=False
)
    gen_valid = Generator(
    df=valid,
    images_src_dir=TRAIN_IMG_PATH,
    target_image_size=IMAGE_SIZE,
    batch_size = BATCH_SIZE,
    shuffle=False, augment=False, crop=False, resize=False
)

    history = model.fit(
        gen_train,
        validation_data=gen_valid,
        epochs=100,
        steps_per_epoch=train.shape[0] // BATCH_SIZE,
        validation_steps=valid.shape[0] // BATCH_SIZE,
        verbose=1,
        workers=10,
#         use_multiprocessing=True,
        callbacks=callbacks,
    )
    keras.backend.clear_session()

Epoch 1/100
Epoch 00001: val_f1_score improved from -inf to 0.85384, saving model to /app/_data/models/complex/eff4_ns_cr_with_predicted_complex_v2_kf1.h5
Epoch 2/100
Epoch 00002: val_f1_score improved from 0.85384 to 0.86145, saving model to /app/_data/models/complex/eff4_ns_cr_with_predicted_complex_v2_kf1.h5
Epoch 3/100
Epoch 00003: val_f1_score improved from 0.86145 to 0.87576, saving model to /app/_data/models/complex/eff4_ns_cr_with_predicted_complex_v2_kf1.h5
Epoch 4/100
Epoch 00004: val_f1_score did not improve from 0.87576
Epoch 5/100
Epoch 00005: val_f1_score improved from 0.87576 to 0.87964, saving model to /app/_data/models/complex/eff4_ns_cr_with_predicted_complex_v2_kf1.h5
Epoch 6/100
Epoch 00006: val_f1_score did not improve from 0.87964
Epoch 7/100
Epoch 00007: val_f1_score did not improve from 0.87964
Epoch 8/100
Epoch 00008: val_f1_score did not improve from 0.87964
Epoch 9/100
Epoch 00009: val_f1_score did not improve from 0.87964
Epoch 10/100
Epoch 00010: val_f1_sco

In [31]:
import kaggle



In [32]:
! kaggle datasets init -p /app/_data/models/complex/eff4PredictedComplex1/

Data package template written to: /app/_data/models/complex/eff4PredictedComplex1/dataset-metadata.json


In [None]:
! kaggle datasets create -p /app/_data/models/complex/eff4PredictedComplex1/

Starting upload for file eff4_ns_cr_with_predicted_complex_v2_kf2.h5
100%|█████████████████████████████████████████| 203M/203M [04:54<00:00, 724kB/s]
Upload successful: eff4_ns_cr_with_predicted_complex_v2_kf2.h5 (203MB)
Starting upload for file eff4_ns_cr_with_predicted_complex_v2_kf1.h5
100%|█████████████████████████████████████████| 203M/203M [04:52<00:00, 729kB/s]
Upload successful: eff4_ns_cr_with_predicted_complex_v2_kf1.h5 (203MB)
Starting upload for file eff4_ns_cr_with_predicted_complex_v2_kf4.h5
100%|█████████████████████████████████████████| 203M/203M [04:53<00:00, 726kB/s]
Upload successful: eff4_ns_cr_with_predicted_complex_v2_kf4.h5 (203MB)
Starting upload for file eff4_ns_cr_with_predicted_complex_v2_kf5.h5
100%|█████████████████████████████████████████| 203M/203M [04:52<00:00, 729kB/s]
Upload successful: eff4_ns_cr_with_predicted_complex_v2_kf5.h5 (203MB)
Skipping folder: .ipynb_checkpoints; use '--dir-mode' to upload folders
Starting upload for file eff4_ns_cr_with_pre