In [1]:
# !pip install venn

In [67]:
import os
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings("ignore")
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
from PIL import Image

from tensorflow.keras.layers.experimental import preprocessing

import matplotlib.image as mpimg
import matplotlib.pyplot as plt
%matplotlib inline


import sklearn
import tensorflow as tf
import tensorflow_addons as tfa

from tensorflow import keras

from tensorflow.keras.applications.resnet import ResNet50, resnet
from tensorflow.keras.layers import (
    AveragePooling2D,
    AvgPool2D,
    Conv2D,
    Dense,
    Flatten,
    GlobalAveragePooling2D,
    MaxPooling2D,
)
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
# from venn import venn

In [3]:
PATH = "/app/_data/"
BATCH_SIZE = 64
SEED = 42

In [35]:
labels = pd.read_csv(PATH + "train.csv")
sample_submission = pd.read_csv(PATH + 'sample_submission.csv')

In [48]:
sample_submission

Unnamed: 0,image,labels
0,85f8cb619c66b863.jpg,healthy
1,ad8770db05586b59.jpg,healthy
2,c7b03e718489f3ca.jpg,healthy


In [36]:
labels.head()
labels.info()

Unnamed: 0,image,labels
0,800113bb65efe69e.jpg,healthy
1,8002cb321f8bfcdf.jpg,scab frog_eye_leaf_spot complex
2,80070f7fb5e2ccaa.jpg,scab
3,80077517781fb94f.jpg,scab
4,800cbf0ff87721f8.jpg,complex


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18632 entries, 0 to 18631
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   image   18632 non-null  object
 1   labels  18632 non-null  object
dtypes: object(2)
memory usage: 291.2+ KB


In [37]:
labels["labels"].value_counts(normalize=True)

scab                               0.259017
healthy                            0.248175
frog_eye_leaf_spot                 0.170728
cider_apple_rust                   0.099828
complex                            0.085981
powdery_mildew                     0.063547
scab frog_eye_leaf_spot            0.036818
scab frog_eye_leaf_spot complex    0.010734
frog_eye_leaf_spot complex         0.008856
rust frog_eye_leaf_spot            0.006441
rust complex                       0.005206
powdery_mildew complex             0.004669
Name: labels, dtype: float64

In [38]:
labels = labels.join(labels["labels"].str.get_dummies(sep=" "))

In [62]:
feature_columns = labels.columns[2:9].tolist()
feature_columns

['cider_apple_rust',
 'complex',
 'frog_eye_leaf_spot',
 'healthy',
 'powdery_mildew',
 'rust',
 'scab']

In [39]:
labels.head()

Unnamed: 0,image,labels,cider_apple_rust,complex,frog_eye_leaf_spot,healthy,powdery_mildew,rust,scab
0,800113bb65efe69e.jpg,healthy,0,0,0,1,0,0,0
1,8002cb321f8bfcdf.jpg,scab frog_eye_leaf_spot complex,0,1,1,0,0,0,1
2,80070f7fb5e2ccaa.jpg,scab,0,0,0,0,0,0,1
3,80077517781fb94f.jpg,scab,0,0,0,0,0,0,1
4,800cbf0ff87721f8.jpg,complex,0,1,0,0,0,0,0


In [40]:
def resize_im(
    image_name, new_folder_name="small_nearest", size=(224, 224), resample=Image.NEAREST
):
    image = Image.open(PATH + "train_images/" + image_name)
    image = image.resize(size, resample=resample)
    if not os.path.isdir(PATH + "train_images/" + new_folder_name + "/"):
        os.mkdir(PATH + "train_images/" + new_folder_name + "/")
    image.save(PATH + "train_images/" + new_folder_name + "/" + image_name)

In [41]:
# labels['image'].apply(resize_im)

In [42]:
labels["multi_label"] = labels["labels"].astype("category").cat.codes

In [43]:
dict_weights = (
    1
    / labels["multi_label"].value_counts()
    / np.sum(1 / labels["multi_label"].value_counts())
).to_dict()

In [44]:
def weight_fill(x):
   return dict_weights[x]

In [45]:
labels['weights'] = labels["multi_label"].apply(weight_fill)

In [46]:
labels.head()

Unnamed: 0,image,labels,cider_apple_rust,complex,frog_eye_leaf_spot,healthy,powdery_mildew,rust,scab,multi_label,weights
0,800113bb65efe69e.jpg,healthy,0,0,0,1,0,0,0,4,0.004764
1,8002cb321f8bfcdf.jpg,scab frog_eye_leaf_spot complex,0,1,1,0,0,0,1,11,0.110133
2,80070f7fb5e2ccaa.jpg,scab,0,0,0,0,0,0,1,9,0.004564
3,80077517781fb94f.jpg,scab,0,0,0,0,0,0,1,9,0.004564
4,800cbf0ff87721f8.jpg,complex,0,1,0,0,0,0,0,1,0.013749


In [95]:
train_datagen = ImageDataGenerator(
    validation_split=0.2,
    rescale=1./255,
# #     samplewise_center=True, 
# #     samplewise_std_normalization=True, 
#     rotation_range = 45,width_shift_range=0.25,
#     height_shift_range=0.25,
#     brightness_range=(0.5, 1.5),
#     shear_range = 45,
#     zoom_range = 0.25,
#     horizontal_flip=True,
#     vertical_flip=True
)
train = train_datagen.flow_from_dataframe(
    dataframe=labels,
    directory=PATH + "train_images/small_nearest/",
    x_col="image",
    y_col=feature_columns,
    target_size=(224, 224),
    batch_size=BATCH_SIZE,
    class_mode="raw",
    subset="training",
    seed=SEED,
)

Found 14906 validated image filenames.


In [92]:
valid_datagen = ImageDataGenerator(
    validation_split=0.2,
#     rescale=1./255,
# #     samplewise_center=True,
# #     samplewise_std_normalization=True,
#     rotation_range = 45,
#     width_shift_range=0.25,
#     height_shift_range=0.25,
#     brightness_range=(0.5, 1.5),
#     shear_range = 45,
#     zoom_range = 0.25,
#     horizontal_flip=True,
#     vertical_flip=True
)
valid = valid_datagen.flow_from_dataframe(
    dataframe=labels,
    directory=PATH + "train_images/small_nearest/",
    x_col="image",
    y_col=feature_columns,
    target_size=(224, 224),
    batch_size=BATCH_SIZE,
    class_mode="raw",
    subset="validation",
    seed=SEED,
)

Found 3726 validated image filenames.


In [50]:
test_datagen = ImageDataGenerator(
    rescale=1./255)
test = test_datagen.flow_from_dataframe(
    dataframe=sample_submission,
    directory=PATH + "test_images/",
    x_col="image",
    y_col=None,
    target_size=(224, 224),
    batch_size=BATCH_SIZE,
    class_mode=None,
    seed=SEED,
    shuffle=False
)

Found 3 validated image filenames.


In [None]:
backbone = ResNet50(input_shape=(224, 224, 3), weights="imagenet", include_top=False)
model_w = Sequential()
model_w.add(backbone)
model_w.add(GlobalAveragePooling2D())
model_w.add(Dense(units=7, activation="sigmoid"))
model_w.compile(
    loss="CategoricalCrossentropy",
    optimizer=Adam(lr=0.001),
    metrics=[keras.metrics.CategoricalAccuracy(), keras.metrics.Recall(), keras.metrics.Precision(), tfa.metrics.F1Score(
                    num_classes=7, 
                    average='macro')]
)

In [None]:
keras.preprocessing.image.random_shear(45)


In [146]:
data_augmentation = keras.Sequential(
    [
        preprocessing.RandomFlip("horizontal_and_vertical"),
        preprocessing.RandomHeight(0.1),
        preprocessing.RandomWidth(0.1),
        preprocessing.RandomRotation(0.15),
        preprocessing.RandomZoom(0.25, 0.25),
#         keras.preprocessing.image.random_shear(45),
#         keras.preprocessing.image.random_rotation(45),
#         keras.preprocessing.image.random_brightness(0.5, 1.5),
#         keras.preprocessing.image.random_zoom(0.75, 1.25),
#         keras.preprocessing.image.random_shift(0.25, 0.25)
    ]
)


In [89]:
# plt.figure(figsize=(10, 10))
# for images, _ in next(train)[0]:
#     for i in range(9):
#         augmented_images = data_augmentation(images)
#         ax = plt.subplot(3, 3, i + 1)
#         plt.imshow(augmented_images[0].numpy().astype("uint8"))
#         plt.axis("off")

<Figure size 720x720 with 0 Axes>

ValueError: Tensor's shape (202, 3, 3) is not compatible with supplied shape [224, None, 3]

<Figure size 720x720 with 0 Axes>

In [131]:
# image = next(train)[0][0]
# plt.imshow(image)

In [159]:
next(valid)[0].shape

(64, 224, 224, 3)

In [100]:
for images in next(train)[0][:3]:
    print(data_augmentation(images).shape)
#     plt.imshow(data_augmentation(images)[0].numpy().astype("uint8"))
#     plt.show();

IndexError: list index out of range

In [161]:
def preprocess_data(x):
#     x_p = data_augmentation(x)
    x_p = keras.applications.resnet.preprocess_input(x)
    return(x_p)

In [162]:
preprocess_data(next(valid)[0])

array([[[[ 5.0609970e+00,  3.1221001e+01,  2.3199997e+00],
         [-5.9390030e+00,  2.3221001e+01, -6.6800003e+00],
         [-9.9390030e+00,  2.1221001e+01, -8.6800003e+00],
         ...,
         [-2.2939003e+01, -2.3778999e+01, -2.6800003e+00],
         [-2.1939003e+01, -2.3778999e+01, -2.6800003e+00],
         [-2.2939003e+01, -2.4778999e+01, -3.6800003e+00]],

        [[-3.9390030e+00,  2.5221001e+01, -4.6800003e+00],
         [-9.9390030e+00,  2.1221001e+01, -8.6800003e+00],
         [-1.1939003e+01,  2.1221001e+01, -8.6800003e+00],
         ...,
         [-2.1939003e+01, -2.2778999e+01, -1.6800003e+00],
         [-2.1939003e+01, -2.3778999e+01, -2.6800003e+00],
         [-2.2939003e+01, -2.4778999e+01, -3.6800003e+00]],

        [[-1.2939003e+01,  1.8221001e+01, -1.1680000e+01],
         [-1.3939003e+01,  1.9221001e+01, -1.0680000e+01],
         [-1.3939003e+01,  2.1221001e+01, -9.6800003e+00],
         ...,
         [-2.1939003e+01, -2.2778999e+01, -1.6800003e+00],
         [

In [164]:
input_shape = (224, 224, 3)
num_classes = 7
inputs = keras.Input(shape=input_shape, batch_size=16)
# x = data_augmentation(inputs)
x = keras.applications.resnet.preprocess_input(inputs)
# x = keras.preprocessing.image.random_shear(x, 45, row_axis=0, col_axis=1, channel_axis=2),
# x = keras.preprocessing.image.random_rotation(x, 45, row_axis=0, col_axis=1, channel_axis=2),
# x = keras.preprocessing.image.random_brightness(x, (0.5, 1.5)),
# x = keras.preprocessing.image.random_zoom(x, (0.75, 1.25), row_axis=0, col_axis=1, channel_axis=2),
# x = keras.preprocessing.image.random_shift(x, 0.25, 0.25, row_axis=0, col_axis=1, channel_axis=2)
x = keras.applications.ResNet50(weights="imagenet", include_top=False)(x)
x = keras.layers.GlobalAveragePooling2D(name="avg_pool")(x)
x = keras.layers.Flatten(name="flatten")(x)
outputs = keras.layers.Dense(num_classes, activation="sigmoid")(x)
model = keras.Model(inputs=inputs, outputs=outputs)

# model.compile(
#     loss="CategoricalCrossentropy",
#     optimizer=Adam(lr=0.001),
#     metrics=[keras.metrics.CategoricalAccuracy(), keras.metrics.Recall(), keras.metrics.Precision(), tfa.metrics.F1Score(
#                     num_classes=7, 
#                     average='macro')]
# )

ResourceExhaustedError: OOM when allocating tensor with shape[3,3,128,128] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc [Op:Add]

In [135]:
tf.config.list_physical_devices("GPU")

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [None]:
def plot_curves(model, loss=False):
    df = pd.DataFrame(model.history)
    if loss:
        plt.figure(figsize=(12, 6))
        plt.title("loss")
        plt.plot(df.iloc[:, 0], label="loss")
        plt.plot(df.iloc[:, 4], label="val_loss")
        plt.legend()
        plt.show()
    else:
        f1_score_train = f1(np.array(df.iloc[:, 2]), np.array(df.iloc[:, 3]))
        f1_score_valid = f1(
            np.array(df.iloc[:, 6]),
            np.array(df.iloc[:, 7]),
        )
        plt.figure(figsize=(12, 20))
        plt.subplot(4, 1, 1)
        plt.title("accuracy")
        plt.plot(df.iloc[:, 1], label="accuracy")
        plt.plot(df.iloc[:, 5], label="val_accuracy")
        plt.legend()

        plt.subplot(4, 1, 2)
        plt.title("recall")
        plt.plot(df.iloc[:, 2], label="recall")
        plt.plot(df.iloc[:, 6], label="val_recall")
        plt.legend()

        plt.subplot(4, 1, 3)
        plt.title("precision")
        plt.plot(df.iloc[:, 3], label="precision")
        plt.plot(df.iloc[:, 7], label="val_precision")
        plt.legend()

        plt.subplot(4, 1, 4)
        plt.title("f1")
        plt.plot(f1_score_train, label="f1")
        plt.plot(f1_score_valid, label="val_f1")
        plt.legend()
        plt.show();

In [None]:
prediction = model_w.predict(test) > 0.5

In [None]:
test_sub = pd.DataFrame(prediction, columns=feature_columns)
for name in test_sub.columns.tolist():
    test_sub[name] = test_sub[name].replace({True: name, False: ''})
test_sub['all'] = test_sub.apply(" ".join, axis=1).str.split().str.join(sep=' ')