<a href="https://colab.research.google.com/github/tgngenuka/Artificial-Intelligence-Bootcamp-TechCrush/blob/main/faw_capstone_project_(312_datasets).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Step 0: Setup and Environment Check
This section imports necessary libraries and sets core parameters

In [13]:
# install/import necessary libraries
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import os
import zipfile
from tensorflow.keras.models import load_model # Used for loading the best model


# Define core parameters
IMG_WIDTH = 224 # Standard size for MobileNetV2
IMG_HEIGHT = 224
BATCH_SIZE = 32
RANDOM_SEED = 42
EPOCHS = 15 # Increased to 15 to give the model more time to learn the 3 classes

# Set a seed for reproducibility
tf.random.set_seed(RANDOM_SEED)

Step 1: Data Retrieval and Unzipping

In [14]:
# 1. Clone the GitHub repository
print("Cloning the GitHub repository...")
!git clone https://github.com/mantle-bearer/FAW-Detection-Capstone

# Define paths based on the cloned repository structure
REPO_FOLDER = "FAW-Detection-Capstone"
ZIP_FILE_NAME = "fall armyworm identification.zip" # <-- CHECK THIS EXACT NAME!
DATA_ROOT_DIR = "fall armyworm identification" # Name of the folder where the data will be extracted

ZIP_FILE_PATH = os.path.join(REPO_FOLDER, ZIP_FILE_NAME)

# 2. Unzip the dataset
if os.path.exists(ZIP_FILE_PATH):
    print(f"Unzipping {ZIP_FILE_NAME}...")
    with zipfile.ZipFile(ZIP_FILE_PATH, 'r') as zip_ref:
        # Extracts to a folder named 'fa_detection_data' which should contain 'train' and 'validation'
        zip_ref.extractall(DATA_ROOT_DIR)
    print("Unzipping complete.")
else:
    print(f"Error: Zip file not found at {ZIP_FILE_PATH}. Please check the file path inside your GitHub repo.")

Cloning the GitHub repository...
fatal: destination path 'FAW-Detection-Capstone' already exists and is not an empty directory.
Unzipping fall armyworm identification.zip...
Unzipping complete.


Step 2: Load Data and Create Datasets

In [15]:
# Assume your data is now structured as:
# fall armyworm identification/
# ├── fall armyworm identification/
# │   ├── train/
# │   │   ├── image1.jpg
# │   │   ├── image2.webp
# │   │   └── _classes.csv
# │   └── valid/
# │       ├── image3.jpg
# │       ├── image4.webp
# │       └── _classes.csv

# Function to get list of image files
def get_image_files(directory):
    image_files = []
    for root, _, files in os.walk(directory):
        for file in files:
            if file.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif', '.webp')):
                image_files.append(os.path.join(root, file))
    return image_files

TRAIN_DIR = os.path.join(DATA_ROOT_DIR, 'fall armyworm identification', 'train')
VALIDATION_DIR = os.path.join(DATA_ROOT_DIR, 'fall armyworm identification', 'valid')

# Get list of image files
train_image_files = get_image_files(TRAIN_DIR)
val_image_files = get_image_files(VALIDATION_DIR)

# Print the number of files found to verify
print(f"Found {len(train_image_files)} training image files.")
print(f"Found {len(val_image_files)} validation image files.")


# Example (assuming class name is the first part of the filename before '_'):
def get_label_from_filename(filename):
    # This is a placeholder - replace with your actual logic
    class_name = os.path.basename(filename).split('_')[0]
    return class_name

# Create datasets from file paths
train_labels = [get_label_from_filename(f) for f in train_image_files]
val_labels = [get_label_from_filename(f) for f in val_image_files]

# Get unique class names
CLASS_NAMES = sorted(list(set(train_labels + val_labels)))
NUM_CLASSES = len(CLASS_NAMES)
print(f"Detected Classes: {CLASS_NAMES}")

# Create TensorFlow Datasets (you'll need to implement the loading and decoding)
# This is a placeholder and needs to be replaced with actual image loading and label encoding
train_ds = tf.data.Dataset.from_tensor_slices((train_image_files, train_labels))
val_ds = tf.data.Dataset.from_tensor_slices((val_image_files, val_labels))

# You will need to add mapping functions to load images and convert labels to categorical
# Example (needs to be implemented):
# def load_image_and_label(image_path, label):
#     # Load image, decode, resize, etc.
#     # Convert label to one-hot encoding
#     return image, one_hot_label

# train_ds = train_ds.map(load_image_and_label)
# val_ds = val_ds.map(load_image_and_label)

# Add batching and prefetching
# train_ds = train_ds.batch(BATCH_SIZE).prefetch(buffer_size=tf.data.AUTOTUNE)
# val_ds = val_ds.batch(BATCH_SIZE).prefetch(buffer_size=tf.data.AUTOTUNE)

Found 312 training image files.
Found 32 validation image files.
Detected Classes: ['', '0314F1-3241a-1540x800', '0d1ed5d9-a179-4b84-9126-5b95d37279a3-AP19078163984841', '1', '1263061', '1599418', '1599890', '1600102', '1673029-Fall-Armyworm-e1497986879311', '200522-Fall-armyworm-pic-002-in-maize-scaled', '20180710-fall-armyworm', '20200224', '20220801', '210427-Fall-armyworm-pic-001', '29810', '360', '3939-15farmyworm', '3f50a063-af57-48cf-a23a-5f761a344cfe-large16x9', '40538', '4387031', '4387058', '48045734022', '49107', '5110009', '5361210-Fall-Armyworm-e1497986850347', '5485992', '5caaa99b50', '61348de63d0ab-image', 'AdobeStock', 'AgronomyNews', 'Army', 'Armyworm-1024x698-1', 'Armyworms', 'Armyworms-continue-their-relentless-attacks-on-crops-worldwide-', 'FAO', 'FAW', 'FAW-TN-1-', 'FAW-TN-2-', 'FAW-in-late-corn-3', 'FAW-larva-in-KRS-corn-1-4-2020', 'FAW2', 'FAWormsDenudedAlfalfaHay', 'Fall-Armyworm-larva', 'Fall-Armyworm-larva-JFD', 'Fall-army-worm', 'Fall-armyworm-1', 'Fall-armyw

In [16]:
print(tf.__version__)

2.19.0


In [17]:
# Inspect the extracted directory to understand the structure
print(f"Contents of {DATA_ROOT_DIR}:")
!ls -R "{DATA_ROOT_DIR}"

Contents of fall armyworm identification:
'fall armyworm identification':
'fall armyworm identification'

'fall armyworm identification/fall armyworm identification':
README.dataset.txt  README.roboflow.txt  test  train  valid

'fall armyworm identification/fall armyworm identification/test':
309911923_1-6-555x370_jpg.rf.7c63d158a18e77cc53518ad49b031aa6.jpg
8_fawheadcapsule_jpg.rf.46ccbf7fda7ff989fb99c1c858d63d20.jpg
armyworm-1_jpg.rf.53d03dec32236aba85cd447d72effb5b.jpg
armyworm-3_jpg.rf.42ac795eb3b1f2845789ba1238a7a621.jpg
Armyworm-on-corn-1024x535_jpg.rf.c19efd7ad5b0f67da491579ce73f6d5a.jpg
blog-2_webp.rf.df660ca156e9adc40e93bd19571c3a31.jpg
_classes.csv
ezgif-3-4d201179e1_webp.rf.daa3f3a768d8e644164537e76c217ba0.jpg
ezgif-7-98e0c5a943_jpg.rf.9b023ed68e14609779d725ad22931141.jpg
Fall-armyworm-identification-colors_jpg.rf.120ccddef8c67a57904d8e3fc11d0818.jpg
fall_armyworm_russ_ottens_ipmimages_jpg.rf.4b4eb0b2a04be7bef17f71cb5a7cfd1b.jpg
FAW-damage-to-sod-farm-1024x768_jpg.rf.06d353bf

2. Model Building (Modified Output Layer)

In [19]:
import tensorflow as tf # Added import as a failsafe

# Define core parameters (Added as failsafe - should be defined in the first cell)
IMG_WIDTH = 224 # Standard size for MobileNetV2
IMG_HEIGHT = 224

# Load the MobileNetV2 base model
base_model = tf.keras.applications.MobileNetV2(input_shape=(IMG_HEIGHT, IMG_WIDTH, 3),
                                               include_top=False, # Exclude the classification layer
                                               weights='imagenet')

# Freeze the base model to prevent the weights from being updated during training
base_model.trainable = False


# 3. Create the Keras Sequential Model with a custom classification head
model = tf.keras.Sequential([
    tf.keras.layers.Lambda(tf.keras.applications.mobilenet_v2.preprocess_input,
                           input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)), # Wrap preprocess_input in a Lambda layer
    base_model,
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    # Output Layer: 1 unit with sigmoid activation for Binary Classification
    tf.keras.layers.Dense(1, activation='sigmoid') # <<< KEY CHANGE: 1 unit and 'sigmoid'
])

print("\n--- Model Summary ---")
model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step

--- Model Summary ---


  super().__init__(**kwargs)


3. Compile, Train, and Evaluate (Modified Compilation)

In [None]:
import tensorflow as tf # Added import as a failsafe

# Define core parameters (Added as failsafe - should be defined in the first cell)
IMG_WIDTH = 224 # Standard size for MobileNetV2
IMG_HEIGHT = 224

# Define metrics required by the project
METRICS = [
    'accuracy',
    tf.keras.metrics.Precision(name='precision'),
    tf.keras.metrics.Recall(name='recall'),
]

# 1. Compile the model
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss='binary_crossentropy', # <<< KEY CHANGE: Use 'binary_crossentropy'
    metrics=METRICS
)

# ... (The rest of the training and evaluation code remains the same) ...