In [None]:
!pip install -q efficientnet

In [None]:
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Dropout, BatchNormalization, MaxPooling2D, Flatten, GlobalAveragePooling2D
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNet, ResNet152V2
from sklearn.model_selection import train_test_split
from tensorflow.keras.regularizers import l2
from efficientnet.tfkeras import EfficientNetB7

In [None]:
AUTO = tf.data.experimental.AUTOTUNE
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection. No parameters necessary if TPU_NAME environment variable is set. On Kaggle this is always the case.
    print('Running on TPU ', tpu.master())
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
    strategy = tf.distribute.get_strategy() # default distribution strategy in Tensorflow. Works on CPU and single GPU.

print("REPLICAS: ", strategy.num_replicas_in_sync)

BATCH_SIZE = strategy.num_replicas_in_sync * 128

In [None]:
df = pd.read_csv("/kaggle/input/plant-pathology-2020-fgvc7/train.csv")

In [None]:
imageNames = df['image_id'].to_numpy()

arr = []
for imageName in imageNames:
    img = Image.open("/kaggle/input/plant-pathology-2020-fgvc7/images/" + imageName + ".jpg")
    img = img.resize((224, 224))
    arr.append(np.asarray(img))
    img.close()

In [None]:
x_train = np.array(arr)
x_train = np.divide(x_train, 255).astype('int32')

In [None]:
y_train = df[['healthy', 'multiple_diseases', 'rust', 'scab']].to_numpy()

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x_train, y_train, test_size=0.1, random_state=45)

In [None]:
x_train.shape[1:]

In [None]:
train_dataset = (
    tf.data.Dataset
    .from_tensor_slices((x_train, y_train))
    .repeat()
    .cache()
    .shuffle(224)
    .batch(BATCH_SIZE)
    .prefetch(AUTO)
)

test_dataset = (
    tf.data.Dataset
    .from_tensor_slices((x_test, y_test))
    .repeat()
    .cache()
    .shuffle(224)
    .batch(BATCH_SIZE)
    .prefetch(AUTO)
)

In [None]:
#generator = ImageDataGenerator(rotation_range=10, zoom_range = 0.10, width_shift_range=0.1, height_shift_range=0.1).flow(x_train, y_train, batch_size=256)

In [None]:
with strategy.scope():
    model = Sequential()
    
    model.add(EfficientNetB7(include_top=False, weights='imagenet', input_shape=x_train.shape[1:], pooling='max')),
    
    model.add(Dense(4, activation='softmax'))

    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    model.summary()

In [None]:
es = EarlyStopping(patience=5, restore_best_weights=True)

In [None]:
history = model.fit_generator(train_dataset, epochs=50, steps_per_epoch=y_train.shape[0]//BATCH_SIZE, validation_data=test_dataset, validation_steps=4, callbacks=[es])

In [None]:
model.evaluate(x_test, y_test)

In [None]:
plt.subplot(211)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['loss','val_loss'], loc='upper left')

plt.subplot(212)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['accuracy','val_accuracy'], loc='upper left')

plt.show()

In [None]:
df_test = pd.read_csv("/kaggle/input/plant-pathology-2020-fgvc7/test.csv")
df_test.head()

In [None]:
imageNames = df_test['image_id'].to_numpy()

arr_pred = []
for imageName in imageNames:
    img = Image.open("/kaggle/input/plant-pathology-2020-fgvc7/images/" + imageName + ".jpg")
    img = img.resize((128, 128))
    arr_pred.append(np.asarray(img))
    img.close()

In [None]:
x_pred = np.array(arr_pred)

preds = model.predict(x_test)
print(np.argmax(preds[0])