In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from kaggle_datasets import KaggleDatasets
import matplotlib.pyplot as plt

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
AUTO = tf.data.experimental.AUTOTUNE

tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
tf.config.experimental_connect_to_cluster(tpu)
tf.tpu.experimental.initialize_tpu_system(tpu)

tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)

In [None]:
GCS_DS_PATH = KaggleDatasets().get_gcs_path()

In [None]:
train_df = pd.read_csv('/kaggle/input/plant-pathology-2020-fgvc7/train.csv')
test_df = pd.read_csv('/kaggle/input/plant-pathology-2020-fgvc7/test.csv')
sample_df = pd.read_csv('/kaggle/input/plant-pathology-2020-fgvc7/sample_submission.csv')

train_label = train_df.loc[:,'healthy':].values
print(train_label.shape[0]//64)

train_path = train_df.image_id.apply(lambda x: f'{GCS_DS_PATH}/images/{x}.jpg').values

In [None]:
def decode_img(file_name, label = None):
    bits = tf.io.read_file(file_name)
    img = tf.cast(tf.image.decode_jpeg(bits, channels = 3),dtype = tf.float32)
    img = tf.image.resize(img,(700,700))
    if label == None:
        return img/255.
    else:
        return img/255.,label

In [None]:
batch_size = 8 * tpu_strategy.num_replicas_in_sync
train_dataset = tf.data.Dataset.from_tensor_slices((train_path, train_label)).map(decode_img, num_parallel_calls=AUTO).cache().repeat().shuffle(1024).batch(batch_size).prefetch(AUTO)


In [None]:
!pip install -q efficientnet
from efficientnet.tfkeras import EfficientNetB7

In [None]:
with tpu_strategy.scope():
    base_model = EfficientNetB7(include_top=False, weights='imagenet', input_shape=(700,700,3),pooling=None)
    model = Sequential()
    model.add(base_model)
    model.add(layers.Flatten())
    model.add(layers.Dense(4, activation = 'softmax'))

    # Optimizer
    optimizer = tf.keras.optimizers.Adam(0.00009)

    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    print(model.summary())

In [None]:
epochs = 50
model.fit(train_dataset, epochs = epochs,steps_per_epoch = train_label.shape[0]//batch_size)

In [None]:
model.save("my_model.h5")

In [None]:
test_df = pd.read_csv('/kaggle/input/plant-pathology-2020-fgvc7/test.csv')
print(test_df.head())
test_path = test_df.image_id.apply(lambda x: f'{GCS_DS_PATH}/images/{x}.jpg').values

In [None]:
test_dataset = tf.data.Dataset.from_tensor_slices(test_path).map(decode_img, num_parallel_calls=AUTO).batch(batch_size)
preds = model.predict(test_dataset)

In [None]:
sample_df.loc[:,'healthy':] = preds
print(sample_df.head())

In [None]:
sample_df.to_csv('submission.csv', index=False)