<a href="https://colab.research.google.com/github/shawal-mbalire/ai_lab_hack/blob/main/Xception.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!wget https://storage.googleapis.com/air-lab-hackathon/Maize/classification/Classification_maize.zip
!mkdir Maize_Data
!rm -rf sample_data
!cd Maize_Data && unzip ../Classification_maize.zip
!rm Classification_maize.zip

--2024-05-25 07:17:53--  https://storage.googleapis.com/air-lab-hackathon/Maize/classification/Classification_maize.zip
Resolving storage.googleapis.com (storage.googleapis.com)... 142.251.175.207, 74.125.24.207, 142.251.10.207, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|142.251.175.207|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 19414007522 (18G) [application/zip]
Saving to: ‘Classification_maize.zip’

Classification_maiz  34%[=====>              ]   6.20G  21.1MB/s    eta 9m 35s 

In [None]:
import cv2
import glob
import pandas
import tensorflow
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score
import tensorflow.keras.backend as K
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.applications import Xception
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import confusion_matrix, classification_report
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, LearningRateScheduler, ModelCheckpoint
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization

In [None]:
train_data_dir          = 'Maize_Data/train'
validation_data_dir = 'Maize_Data/validation'

train_datagen  =  ImageDataGenerator(rescale=1./255)
valid_datagen  =  ImageDataGenerator(rescale=1./255)

train = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(256, 256),
    batch_size=16,
    classes = ['FAW', 'Healthy', 'MLB', 'MLN', 'MSV']
)

valid = valid_datagen.flow_from_directory(
    validation_data_dir,
    target_size=(256, 256),
    batch_size=16,
    classes = ['FAW', 'Healthy', 'MLB', 'MLN', 'MSV']
)


In [None]:
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(train.classes),
    y=train.classes
)
print(class_weights)
class_weights = dict(enumerate(class_weights))
print(class_weights)
def lr_scheduler(epoch, lr):
    if epoch % 5 == 0 and epoch != 0:
        return lr * 0.5
    return lr

In [None]:
early_stopping = EarlyStopping(
    monitor='f1_m',
    patience=3,
    restore_best_weights=True
)
model_checkpoint = ModelCheckpoint(
    'best_model.keras',
    save_best_only=True,
    monitor='f1_m',
    mode='max'
)
base_model = Xception(
    include_top=False,
    input_shape=(256, 256, 3)
)

num_classes = len(train.class_indices)
num_classes

In [None]:
model = Sequential()
model.add(base_model)
model.add(tensorflow.keras.layers.GlobalAveragePooling2D())
model.add(Dense(10, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(units = num_classes, activation='softmax'))
model.summary()

In [None]:
def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [None]:
# Compile the model with a lower learning rate for fine-tuning
optimizer = Adam(learning_rate=1e-5)
model.compile(
    optimizer=optimizer,
    loss=tensorflow.keras.losses.CategoricalCrossentropy(from_logits=False),
    metrics=['acc',f1_m,precision_m, recall_m]
)

# Train the model with class weights to handle class imbalance
hist = model.fit(
    train,
    epochs=20,
    validation_data=valid,
    callbacks=[early_stopping, LearningRateScheduler(lr_scheduler), model_checkpoint],
    class_weight=class_weights
)

In [None]:
fig = plt.figure()
plt.plot(hist.history['loss'], color='teal', label='loss')
plt.plot(hist.history['val_loss'], color='orange', label='val_loss')
fig.suptitle('Loss', fontsize=20)
plt.legend(loc="upper left")
fig.savefig('loss.png',format='png')
plt.show()

fig = plt.figure()
plt.plot(hist.history['acc'], color='teal', label='accuracy')
plt.plot(hist.history['val_acc'], color='orange', label='val_accuracy')
fig.suptitle('Accuracy', fontsize=20)
plt.legend(loc="upper left")
fig.savefig('accuracy.png',format='png')
plt.show()

fig = plt.figure()
plt.plot(hist.history['f1_m'], color='teal', label='f1_m')
plt.plot(hist.history['val_f1_m'], color='orange', label='val_f1_m')
fig.suptitle('f1_m', fontsize=20)
plt.legend(loc="upper left")
fig.savefig('f1_m.png',format='png')
plt.show()

fig = plt.figure()
plt.plot(hist.history['precision_m'], color='teal', label='precision_m')
plt.plot(hist.history['val_precision_m'], color='orange', label='val_precision_m')
fig.suptitle('precision_m', fontsize=20)
plt.legend(loc="upper left")
fig.savefig('precision.png',format='png')
plt.show()

fig = plt.figure()
plt.plot(hist.history['recall_m'], color='teal', label='recall_m')
plt.plot(hist.history['val_recall_m'], color='orange', label='val_recall_m')
fig.suptitle('recall_m', fontsize=20)
plt.legend(loc="upper left")
fig.savefig('recall.png',format='png')
plt.show()

In [None]:
model = tensorflow.keras.saving.load_model(
    'best_model.keras',
    custom_objects = {
        'f1_m'        : f1_m,
        'precision_m' : precision_m,
        'recall_m'    : recall_m,
    }
)

In [None]:
model.summary()

In [None]:
faw_path = "Maize_Data/train/FAW/FAW_1.jpg"
faw_image = cv2.imread(faw_path)
resized_faw_img = tensorflow.image.resize(faw_image,[256,256])
faw, healthy, mlb, mln, msv = model.predict(np.expand_dims(resized_faw_img/255,0))[0]
print(faw, healthy, mlb, mln, msv)

path = "Maize_Data/train/Healthy/Healthy_1.jpg"
image = cv2.imread(path)
path_1 = tensorflow.image.resize(image,[256,256])
faw, healthy, mlb, mln, msv = model.predict(np.expand_dims(path_1/255,0))[0]
print(faw, healthy, mlb, mln, msv)

path = "Maize_Data/train/MLB/MLB_1.jpg"
image = cv2.imread(path)
path_1 = tensorflow.image.resize(image,[256,256])
faw, healthy, mlb, mln, msv = model.predict(np.expand_dims(path_1/255,0))[0]
print(faw, healthy, mlb, mln, msv)

path = "Maize_Data/train/MLN/MLN_1.jpg"
image = cv2.imread(path)
path_1 = tensorflow.image.resize(image,[256,256])
faw, healthy, mlb, mln, msv = model.predict(np.expand_dims(path_1/255,0))[0]
print(faw, healthy, mlb, mln, msv)

path = "Maize_Data/train/MSV/MSV_1.jpg"
image = cv2.imread(path)
path_1 = tensorflow.image.resize(image,[256,256])
faw, healthy, mlb, mln, msv = model.predict(np.expand_dims(path_1/255,0))[0]
print(faw, healthy, mlb, mln, msv)

In [None]:
Y_pred = model.predict(train)
Y_pred_classes = np.argmax(Y_pred, axis=1)
Y_true = train.classes
confusion_mtx = confusion_matrix(Y_true, Y_pred_classes)
classes = ['FAW', 'Healthy', 'MLB', 'MLN', 'MSV']
f, ax = plt.subplots(figsize=(7, 7))
sns.heatmap(
    confusion_mtx,
    annot=True,
    linecolor="black",
    fmt=".0f",
    ax=ax, xticklabels=classes, yticklabels=classes
)
ax.set_xlabel("Predicted", fontsize=12)
ax.set_ylabel("True", fontsize=12)
ax.set_title("Train Confusion Matrix", fontsize=14)
plt.show()

report = classification_report(Y_true, Y_pred_classes)
print(report)

In [None]:
Y_pred = model.predict(valid)
Y_pred_classes = np.argmax(Y_pred, axis=1)
Y_true = valid.classes
confusion_mtx = confusion_matrix(
    Y_true,
    Y_pred_classes
)
classes = ['FAW', 'Healthy', 'MLB', 'MLN', 'MSV']
f, ax = plt.subplots(figsize=(7, 7))
sns.heatmap(
    confusion_mtx,
    annot=True,
    linecolor="black",
    fmt=".0f",
    ax=ax, xticklabels=classes, yticklabels=classes
)
ax.set_xlabel("Predicted", fontsize=12)
ax.set_ylabel("True", fontsize=12)
ax.set_title("Valid Confusion Matrix", fontsize=14)
plt.show()

report = classification_report(Y_true, Y_pred_classes)
print(report)

In [None]:
df = pandas.DataFrame(columns=['filename','faw', 'healthy', 'mlb', 'mln', 'msv'])
path = 'Maize_Data/test'
files = glob.glob(path + "/*.jpg")

for path in files:
    image = cv2.imread(path)
    path_1 = tensorflow.image.resize(image,[256,256])
    faw, healthy, mlb, mln, msv = model.predict(np.expand_dims(path_1/255,0))[0]

    folders_and_file = path.split('/')

    file_name = folders_and_file[-1]
    df = pandas.concat([pandas.DataFrame([[file_name,faw, healthy, mlb, mln, msv]],columns=df.columns),df])

df

In [None]:
df.to_csv('18446744073709551616.csv',index=False)

In [None]:
test_data_dir          = 'Maize_Data/test'

test_datagen  =  ImageDataGenerator(rescale=1./255)
test = valid_datagen.flow_from_directory(
    test_data_dir,
    target_size=(256, 256),
    batch_size=16,
    classes = ['FAW', 'Healthy', 'MLB', 'MLN', 'MSV']
)