In [1]:
import os
import json
import numpy as np
import pandas as pd
from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"
import warnings

warnings.filterwarnings("ignore")

import matplotlib.pyplot as plt

%matplotlib inline

import sklearn
import tensorflow as tf
import tensorflow_addons as tfa
from PIL import Image

from sklearn.model_selection import (
    StratifiedKFold,
    StratifiedShuffleSplit,
    train_test_split,
)
from tensorflow import keras
from tensorflow.keras.applications.efficientnet import EfficientNetB4, EfficientNetB7, EfficientNetB0
from tensorflow.keras.layers import (
    AveragePooling2D,
    AvgPool2D,
    Conv2D,
    Dense,
    Flatten,
    GlobalAveragePooling2D,
    MaxPooling2D,
)
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tqdm import notebook, tqdm

In [2]:
PATH = "/app/_data/"
PATH_JSON1 = '/app/_data/json/1071_big.json'
PATH_JSON2 = '/app/_data/json/part_2_bb.json'
BATCH_SIZE = 64
SEED = 37
IMAGE_SIZE = 224
NUM_CLASSES = 6
TRAIN_IMG_PATH = "/app/_data/train_images/"
TEST_IMG_PATH = "/app/_data/test_images/"

In [3]:
labels = pd.read_csv(PATH + "train_labels.csv")
sample_submission = pd.read_csv(PATH + "sample_submission.csv")
labels = labels.join(labels["labels"].str.get_dummies(sep=" "))
feature_columns = labels.columns[2:].tolist()
labels_2020 = pd.read_csv(PATH + "train_20.csv")
labels_2020["image_id"] = labels_2020["image_id"] + ".jpg"
labels_2020.columns = ["image", "healthy", "complex", "rust", "scab"]
labels_2020 = labels_2020.query("complex !=1").reset_index(drop=True)
labels_2020 = labels_2020[["image", "healthy", "rust", "scab"]]
feature_columns_20 = ["healthy", "rust", "scab"]
for i in labels_2020.index:
    labels_2020.loc[i, "labels_20"] = "".join(
        list(
            map(
                lambda x, y: x * y,
                labels_2020.loc[i, feature_columns_20].values,
                feature_columns_20,
            )
        )
    )
labels_21_20 = pd.concat([labels, labels_2020], axis=0, join="outer", ignore_index=True)
labels_21_20[feature_columns] = labels_21_20[feature_columns].fillna(0).astype("int")
labels_21_20["labels"] = labels_21_20["labels"].fillna(labels_21_20["labels_20"])
labels_21_20 = labels_21_20.drop("labels_20", axis=1)
labels_21_20.head(2)

Unnamed: 0,image,labels,complex,frog_eye_leaf_spot,healthy,powdery_mildew,rust,scab
0,800113bb65efe69e.jpg,healthy,0,0,1,0,0,0
1,8002cb321f8bfcdf.jpg,scab frog_eye_leaf_spot complex,1,1,0,0,0,1


In [4]:
with open(PATH_JSON1, "r") as read_file:
    data1 = json.load(read_file)
with open(PATH_JSON2, "r") as read_file:
    data2 = json.load(read_file)

In [5]:
data1.extend(data2)

In [6]:
len(data1)

1564

In [7]:
df = pd.DataFrame()
data = data1
for i in range(len(data)):
  img = data[i]['file_upload'].split('.')[0].split('_')[0]+'.jpg'
  df.loc[img, 'image'] = img
  df.loc[img, 'original_width'] = data[i]['annotations'][0]['result'][0]['original_width']
  df.loc[img, 'original_height'] = data[i]['annotations'][0]['result'][0]['original_height']
  df.loc[img, 'x_min'] = data[i]['annotations'][0]['result'][0]['value']['x']
  df.loc[img, 'y_min'] = data[i]['annotations'][0]['result'][0]['value']['y']
  df.loc[img, 'width'] = data[i]['annotations'][0]['result'][0]['value']['width']
  df.loc[img, 'height'] = data[i]['annotations'][0]['result'][0]['value']['height']
  df.loc[img, 'rotation'] = data[i]['annotations'][0]['result'][0]['value']['rotation']

In [8]:
df['image'].duplicated().sum()

0

In [9]:
col_coord = ['x_min', 'y_min', 'width', 'height']
def get_real_coord1(coord, img_width, img_height):
    x0 = coord[0]
    y0 = coord[1]
    width = coord[2]
    height = coord[3]
    pixel_x = x0 / 100.0 * img_width
    pixel_y = y0 / 100.0 * img_height
    pixel_width = width / 100.0 * img_width
    pixel_height = height / 100.0 * img_height
    return [pixel_x, pixel_y, pixel_width, pixel_height]

def get_real_coord2(coord, img_width, img_height):
    x0 = coord[0]
    y0 = coord[1]
    width = coord[2]
    height = coord[3]
    x_min = x0 / 100.0 * img_width
    y_min = y0 / 100.0 * img_height
    pixel_width = width / 100.0 * img_width
    pixel_height = height / 100.0 * img_height
    x_max = x_min + pixel_width
    y_max = y_min + pixel_height
    return [x_min, y_min, x_max, y_max]

def get_percent_coord(coord):
    x0 = coord[0]
    y0 = coord[1]
    width = coord[2]
    height = coord[3]
    x_min = x0
    y_min = y0
    x_max = x0 + width
    y_max = y0 + height
    return [x_min, y_min, x_max, y_max]  

In [10]:
get_real_coord2(df[col_coord].values[0], 1000, 1000)
(get_percent_coord(df[col_coord].values[0]))

[0.0, 157.76582834147987, 973.3333333333331, 850.2994011976051]

[0.0, 15.776582834147987, 97.33333333333331, 85.02994011976051]

In [11]:
for i in df.index:
    coord = get_percent_coord(df.loc[i, col_coord].values)
    df.loc[i, 'x_max'] = coord[2]
    df.loc[i, 'y_max'] = coord[3]

In [12]:
xy_columns = ['x_min', 'y_min', 'x_max', 'y_max']

In [13]:
df[xy_columns] = df[xy_columns]/100

In [14]:
gen = ImageDataGenerator(validation_split=0.1)
train = gen.flow_from_dataframe(
    dataframe=df,
    directory=TRAIN_IMG_PATH,
    x_col="image",
    y_col=xy_columns,
    target_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE,
    class_mode="raw",
    interpolation = 'bicubic',
    shuffle=True,
    subset='training', seed=SEED
)

Found 1365 validated image filenames.


In [15]:
valid = gen.flow_from_dataframe(
    dataframe=df,
    directory=TRAIN_IMG_PATH,
    x_col="image",
    y_col=xy_columns,
    target_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE,
    class_mode="raw",
    interpolation = 'bicubic',
    shuffle=False,
    subset='validation', seed=SEED
)

Found 151 validated image filenames.


In [16]:
# policy = keras.mixed_precision.experimental.Policy('mixed_float16')
# keras.mixed_precision.experimental.set_policy(policy)

In [17]:
inputs = keras.Input(shape=(IMAGE_SIZE, IMAGE_SIZE, 3))
base_model = keras.applications.EfficientNetB0(weights='imagenet', include_top=False)
x = base_model(inputs)
x = keras.layers.GlobalAveragePooling2D(name="avg_pool")(x)
x = keras.layers.Flatten(name="flatten")(x)
outputs = keras.layers.Dense(4, activation="sigmoid")(x)
model = keras.Model(inputs=inputs, outputs=outputs)

model.compile(
    loss="mse",
    optimizer=Adam(lr=0.0005),
    metrics='mae',
)

In [18]:
callbacks = [
    keras.callbacks.EarlyStopping(
        monitor="val_mae", patience=20, restore_best_weights=True, verbose=1, mode='min',
    ),
    keras.callbacks.ModelCheckpoint(
        "/app/_data/models/bb/bb_b0_1516_1.h5",
        monitor="val_mae",
        verbose=1,
        save_best_only=True,
        save_weights_only=False,
        mode="min",
        save_freq="epoch",
    ),
        keras.callbacks.ReduceLROnPlateau(
        monitor="val_mae",
        factor=0.2,
        patience=5,
        verbose=1,
        mode="min",
        min_delta=1e-4,
        min_lr=0.000000001,
    ),
]

In [None]:
history = model.fit(
    train,
    validation_data=valid,
    epochs=100,
    batch_size = BATCH_SIZE,
    verbose=1,
    use_multiprocessing=True,
    callbacks=callbacks)

Epoch 1/100

Epoch 00001: val_mae improved from inf to 0.10498, saving model to /app/_data/models/bb/bb_b0_1516_1.h5
Epoch 2/100

Epoch 00002: val_mae improved from 0.10498 to 0.08671, saving model to /app/_data/models/bb/bb_b0_1516_1.h5
Epoch 3/100

Epoch 00003: val_mae improved from 0.08671 to 0.06941, saving model to /app/_data/models/bb/bb_b0_1516_1.h5
Epoch 4/100

Epoch 00004: val_mae improved from 0.06941 to 0.06147, saving model to /app/_data/models/bb/bb_b0_1516_1.h5
Epoch 5/100

Epoch 00005: val_mae improved from 0.06147 to 0.06068, saving model to /app/_data/models/bb/bb_b0_1516_1.h5
Epoch 6/100

Epoch 00006: val_mae improved from 0.06068 to 0.05583, saving model to /app/_data/models/bb/bb_b0_1516_1.h5
Epoch 7/100

Epoch 00007: val_mae improved from 0.05583 to 0.05416, saving model to /app/_data/models/bb/bb_b0_1516_1.h5
Epoch 8/100

Epoch 00008: val_mae improved from 0.05416 to 0.05061, saving model to /app/_data/models/bb/bb_b0_1516_1.h5
Epoch 9/100

Epoch 00009: val_mae im