In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
import tensorflow as tf
import tensorflow_hub as hub
#for dirname, _, filenames in os.walk('/kaggle/input'):
#    for filename in filenames:
#        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
image_names=os.listdir('../input/cassava-leaf-disease-classification/test_images')


In [None]:
test_path='drive/MyDrive/Kaggle/train_images/'
train_path='../input/cassava-leaf-disease-classification/train_images/'
arr=image_names
path=test_path
image_add=['../input/cassava-leaf-disease-classification/test_images/'+fname for fname in arr]

In [None]:
from IPython.display import Image
Image(image_add[0])


In [None]:
IMG_SIZE = 224

def process_image(image_path, img_size = IMG_SIZE):
  """
  Takes an image file path and turns the image into a Tensor. 
  """
  # Read in an image file
  image = tf.io.read_file(image_path)
  # Turn the jpeg image into numerical Tensor with 3 colour channels (Red, Green, Blue)
  image = tf.image.decode_jpeg(image, channels = 3)
  # Convert the colour channel values from 0-255 to 0-1 values
  image = tf.image.convert_image_dtype(image, tf.float32)
  # Resize the image to our desired value (224, 224)
  image = tf.image.resize(image,size = [img_size, img_size])
  return image

In [None]:
def get_image_label(image_path, label):
  """
  Takes an image file path name and the assosciated label,
  processes the image and reutrns a typle of (image, label).
  """
  image = process_image(image_path)
  return image,label

In [None]:
BATCH_SIZE = 32

def create_data_batches(X , y = None, batch_size = BATCH_SIZE, valid_data = False, test_data = False):
  """
  Creates batches of data out of image (X) and label (y) pairs.
  Shuffles the data if it's training data but doesn't shuffle if it's validation data.
  Also accepts test data as input (no labels).
  """
  if test_data:
    print('Creating test data batches........')
    data = tf.data.Dataset.from_tensor_slices((tf.constant(X)))
    data_batch = data.map(process_image).batch(batch_size)
    return data_batch
  
  elif valid_data:
    print('Creating valid data batches...........')
    data = tf.data.Dataset.from_tensor_slices((tf.constant(X),tf.constant(y)))
    data_batch = data.map(get_image_label).batch(batch_size)
    return data_batch
  
  else:
    print('Creating training data batches...............')
    data = tf.data.Dataset.from_tensor_slices((tf.constant(X),tf.constant(y)))
    data = data.shuffle(buffer_size = len(X))
    data_batch = data.map(get_image_label).batch(batch_size)
    return data_batch

In [None]:
df=pd.read_csv('../input/cassava-leaf-disease-classification/train.csv')
df['image_read']= [train_path+fname for fname in df['image_id']]
for i in range(len(df['label'])):
  if df['label'][i] == 0:
    df['label'][i] = 'Cassava Bacterial Blight (CBB)'
  elif df['label'][i] == 1:
    df['label'][i] = 'Cassava Brown Streak Disease (CBSD)'
  elif df['label'][i] == 2:
    df['label'][i] = 'Cassava Green Mottle (CGM)'
  elif df['label'][i] == 3:
    df['label'][i] = 'Cassava Mosaic Disease (CMD)'
  else:
    df['label'][i] = 'Healthy'
labels = df['label'].to_numpy()
unique_diseases = np.unique(labels)
boolean_labels = [label == unique_diseases for label in labels]


In [None]:
NUM_IMAGES = 1000 #@param{type:'slider', min:1000, max:10000, step:1000}
X = df['image_read']
y = boolean_labels

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X[:NUM_IMAGES], y[:NUM_IMAGES],
                                                     test_size = 0.2, random_state = 9)
len(X_train), len(X_val), len(y_train), len(y_val)

In [None]:
train_data = create_data_batches(X_train,y_train)
valid_data = create_data_batches(X_val,y_val, valid_data=True)

model = create_models()

In [None]:
INPUT_SHAPE = [224,224,3]

OUTPUT_SHAPE = len(y[0])

MODEL_URL = 'https://tfhub.dev/google/imagenet/mobilenet_v2_130_224/classification/4'
def create_models(input_shape = INPUT_SHAPE, output_shape = OUTPUT_SHAPE, model_url = MODEL_URL):

  """
  Trains a given model and returns the trained version.
  """

  print('Building a model with :',model_url)

  # Setup the model layers
  model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(64, (3,3), activation = 'relu', padding = 'Same',input_shape = INPUT_SHAPE),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Conv2D(128, (3,3), activation = 'relu',padding = 'Same'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Conv2D(128, (3,3), activation = 'relu',padding = 'Same'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Conv2D(128, (3,3), activation = 'relu',padding = 'Same'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Conv2D(256, (3,3), activation = 'relu',padding = 'Same'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(1024,activation = 'relu'),
    tf.keras.layers.Dense(5, activation = 'softmax')
])
  # Compile the model
  model.compile(
      loss = tf.keras.losses.CategoricalCrossentropy(),
      optimizer = tf.keras.optimizers.Adam(),
      metrics = ['accuracy']
  )

  # Build the model
  model.build(input_shape)
  
  return model
early_stopping = tf.keras.callbacks.EarlyStopping(monitor = 'val_accuracy', patience=3)
def train_model():
  model = create_models()


  model.fit(x = train_data, epochs = NUM_EPOCH,
            validation_data = valid_data, validation_freq = 1,
            callbacks = early_stopping)
  
  return model


In [None]:
NUM_EPOCH = 30 #@param {type:'slider', min:10, max:100, step:10}
model = train_model()

inp=create_data_batches(image_add,test_data=True)
final_pred=np.argmax(p,axis=1)

In [None]:
inp=create_data_batches(image_add,test_data=True)

p=model.predict(inp)

Xb=create_data_batches(X[:10],test_data=True)
p=model.predict(Xb)

In [None]:
def load_model(model_path):
  """
  Loads a saved model from a specified path.
  """
  print(f"Loading saved model from: {model_path}")
  model = tf.keras.models.load_model(model_path, 
                                     custom_objects={'KerasLayer': hub.KerasLayer})
  return model

loaded_full_model = load_model('../input/model-file/20210216-110450-full-image-set-model.h5')


In [None]:
pred=model.predict(inp)

In [None]:
final_pred=np.argmax(pred,axis=1)

In [None]:
sub=pd.read_csv('../input/cassava-leaf-disease-classification/sample_submission.csv')
OUTPUT_DIR = './'
subs=pd.DataFrame(columns=sub.columns)
subs['image_id']=arr
subs['label']=final_pred
subs.to_csv(OUTPUT_DIR+'submission.csv', index=False)

In [None]:
subs.head()