# Cassava Disease Classification

## Introduction
1. The notebook is beginners approach for Cassava Disease Classification.
2. The current notebook uses EfficientNetB3 for transfer learning.
3. It achieve accuracy of more than 87% on validation data and 96% on training data. 

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns

%config Completer.use_jedi = False

import warnings
warnings.filterwarnings('ignore')

# Data Loading and Splitting

In [None]:
df = pd.read_csv('../input/cassava-leaf-disease-classification/train.csv')
df.head()

In [None]:
# Importing the json file with labels
import json

with open('../input/cassava-leaf-disease-classification/label_num_to_disease_map.json') as f:
    real_labels = json.load(f)
    real_labels = {int(k):v for k,v in real_labels.items()}

real_labels

In [None]:
sns.countplot(df['label'])

In [None]:
from sklearn.model_selection import train_test_split
train,test = train_test_split(df,test_size = 0.1,stratify = df['label'])

# Data Generator

In [None]:
from tensorflow.keras.applications.efficientnet import EfficientNetB3, preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
datagen = ImageDataGenerator(
    preprocessing_function = preprocess_input)

In [None]:
train['label'] = train['label'].astype(str)
train_gen = datagen.flow_from_dataframe(
    train,
    directory = '../input/cassava-leaf-disease-classification/train_images',
    x_col = 'image_id',
    y_col = 'label',
    target_size = (512,512),
    batch_size = 8
)

test['label'] = test['label'].astype(str)
val_gen = datagen.flow_from_dataframe(
    test,
    directory = '../input/cassava-leaf-disease-classification/train_images',
    x_col = 'image_id',
    y_col = 'label',
    target_size = (512,512),
    batch_size = 8
)

# Modeling

In [None]:
base_model = EfficientNetB3(include_top=False, weights='imagenet',input_shape=(512,512,3))

In [None]:
from tensorflow.keras import layers,models

model = models.Sequential()
model.add(base_model)
model.add(layers.GlobalAveragePooling2D())
model.add(layers.Dense(256,activation='relu'))
model.add(layers.Dropout(0.4))
model.add(layers.Dense(5,activation='softmax'))

model.summary()

In [None]:
model.compile(
    optimizer='adam',
    loss = 'categorical_crossentropy',
    metrics = ['accuracy'])

In [None]:
from keras.callbacks import ModelCheckpoint,ReduceLROnPlateau
checkpoint = ModelCheckpoint(filepath='./best_model.h5', monitor='val_loss', save_best_only=True,verbose=1)

reducelr = ReduceLROnPlateau( 
    monitor='val_loss',
    factor=0.2,
    patience=2,
    min_lr=1e-6,
    mode='min',
    verbose=1
)

my_callbacks = [checkpoint,reducelr]

In [None]:
train_steps = np.ceil(train_gen.n/train_gen.batch_size)
val_steps = np.ceil(val_gen.n/val_gen.batch_size)

# Training

In [None]:
history = model.fit(
    train_gen,
    batch_size = 8,
    epochs = 10,
    steps_per_epoch = train_steps,
    validation_data = val_gen,
    validation_steps = val_steps,
    callbacks = my_callbacks)

In [None]:
model.save('./best_model2.h5')

# Plotting results

In [None]:
import matplotlib.pyplot as plt
# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

# Submission
### Note: Separate notebook has to be created for submission

In [None]:
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import load_model
model = load_model('./best_model.h5')

In [None]:
ss = pd.read_csv('../input/cassava-leaf-disease-classification/sample_submission.csv')
ss.head()

In [None]:
test_path = '../input/cassava-leaf-disease-classification/test_images/'

In [None]:
preds = []

for image_name in ss.image_id:
    img = image.load_img(test_path+image_name,target_size = (512,512))
    x = image.img_to_array(img)
    x = np.expand_dims(x,axis=0)
    x = preprocess_input(x)
    
    prediction = model.predict(x)
    prediction = np.argmax(prediction)
    
    preds.append(prediction)

In [None]:
my_submission = pd.DataFrame({'image_id': ss.image_id, 'label': preds})
my_submission.to_csv('submission.csv', index=False)
my_submission.head()

# Conclusion
1. The current model was able to get accuracy of 87% on validation data and 96% on training data.
2. On test dataset it gives accuracy of 88%.
3. Furthers techniques can be applied to prevent the model from overfiiting to get better accuracy.


![](https://st3.depositphotos.com/1998651/13850/v/1600/depositphotos_138506364-stock-illustration-cup-of-coffee-with-have.jpg)
