##Install required packages

In [None]:
# Colab cell 1
!pip install -q kaggle tensorflow matplotlib


##Upload kaggle.json (Kaggle API token)

In [None]:
# Colab cell 2 - run this and upload kaggle.json when prompted
from google.colab import files
uploaded = files.upload()  # choose kaggle.json from your machine

# move to correct place and set permissions
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# confirm kaggle works
!kaggle --version


##Download + unzip the PlantVillage dataset from Kaggle

In [None]:
# Colab cell 3 - download & unzip
# Download dataset (slug: abdallahalidev/plantvillage-dataset)
!kaggle datasets download -d abdallahalidev/plantvillage-dataset -p /content/

# Unzip any downloaded zip(s) into /content/plant_dataset
!mkdir -p /content/plant_dataset
!unzip -q /content/*.zip -d /content/plant_dataset

# Show top-level files/folders found
!ls -la /content/plant_dataset | sed -n '1,120p'


##Create a small sample dataset (3 largest classes) â€” quick training

In [None]:
# Colab cell 4 - create small subset (3 classes)
import os, shutil, random
from glob import glob
random.seed(42)

ORIG_ROOT = '/content/plant_dataset'
SMALL_ROOT = '/content/plant_dataset_small'
os.makedirs(SMALL_ROOT, exist_ok=True)

# Find directories that contain images
def find_image_dirs(root):
    image_dirs = []
    for dirpath, dirnames, filenames in os.walk(root):
        # consider only directories that have image files
        if any(fname.lower().endswith(('.jpg','.jpeg','.png')) for fname in filenames):
            # Check if the directory contains a significant number of images to be considered a class directory
            if len([f for f in filenames if f.lower().endswith(('.jpg','.jpeg','.png'))]) > 10: # threshold of 10 images to consider it a class dir
                image_dirs.append(dirpath)
    return sorted(image_dirs)


image_dirs = find_image_dirs(ORIG_ROOT)
print(f"Found {len(image_dirs)} possible image dirs. Sample: {image_dirs[:5]}")

# compute counts
counts = []
for d in image_dirs:
    files = [f for f in os.listdir(d) if f.lower().endswith(('.jpg','.jpeg','.png'))]
    counts.append((d, len(files)))
counts = sorted(counts, key=lambda x: x[1], reverse=True)

# pick top N classes (change N to your liking)
N = 3
chosen = counts[:N]
print("Chosen classes (path, count):")
for p,c in chosen:
    print(os.path.basename(p), c)

# create train/valid splits and copy files
train_ratio = 0.8
for class_path, cnt in chosen:
    class_name = os.path.basename(class_path)
    src_files = [f for f in os.listdir(class_path) if f.lower().endswith(('.jpg','.jpeg','.png'))]
    random.shuffle(src_files)
    split = int(train_ratio * len(src_files))
    train_files = src_files[:split]
    valid_files = src_files[split:]

    train_dst = os.path.join(SMALL_ROOT, 'train', class_name)
    valid_dst = os.path.join(SMALL_ROOT, 'valid', class_name)
    os.makedirs(train_dst, exist_ok=True)
    os.makedirs(valid_dst, exist_ok=True)

    for fname in train_files:
        shutil.copy(os.path.join(class_path, fname), os.path.join(train_dst, fname))
    for fname in valid_files:
        shutil.copy(os.path.join(class_path, fname), os.path.join(valid_dst, fname))

print("Small dataset created at:", SMALL_ROOT)
!find /content/plant_dataset_small -maxdepth 2 -type d -print

##Data generators (uses MobileNetV2 preprocess_input)

In [None]:
# Colab cell 5 - data generators
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input

train_dir = '/content/plant_dataset_small/train'
valid_dir = '/content/plant_dataset_small/valid'
IMG_SIZE = (224, 224)
BATCH_SIZE = 32

train_datagen = ImageDataGenerator(preprocessing_function=preprocess_input,
                                   rotation_range=20,
                                   horizontal_flip=True,
                                   zoom_range=0.15)

valid_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

train_gen = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

valid_gen = valid_datagen.flow_from_directory(
    valid_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

##Build the model (MobileNetV2 transfer learning)

In [None]:
# Check number of classes in train_gen
num_classes = train_gen.num_classes
print("Number of classes:", num_classes)

# Then rebuild the model last layer
model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(num_classes, activation='softmax')  # <- use num_classes
])

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.summary()


##Train (fast: small number of epochs)

In [None]:
# Colab cell 7 - train
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

callbacks = [
    ModelCheckpoint('best_model.h5', monitor='val_accuracy', save_best_only=True),
    EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
]

EPOCHS = 8
history = model.fit(
    train_gen,
    validation_data=valid_gen,
    epochs=EPOCHS,
    callbacks=callbacks
)

##Plot accuracy & loss (Matplotlib)

In [None]:
# Colab cell 8 - plots
import matplotlib.pyplot as plt

plt.figure()
plt.plot(history.history['accuracy'], label='train_acc')
plt.plot(history.history['val_accuracy'], label='val_acc')
plt.title('Accuracy')
plt.legend()
plt.show()

plt.figure()
plt.plot(history.history['loss'], label='train_loss')
plt.plot(history.history['val_loss'], label='val_loss')
plt.title('Loss')
plt.legend()
plt.show()


##Save / Load best model

In [None]:
# Colab cell 9 - save/load
model.save('plantshieldnet_mobilenet_full.h5')       # full model
# To load later:
# from tensorflow.keras.models import load_model
# model = load_model('best_model.h5')


##Inference on a new image (upload & predict)

In [None]:
# Colab cell 10 - test on your image
from google.colab import files
from tensorflow.keras.preprocessing import image
import numpy as np

# Upload an image from your computer
uploaded = files.upload()
img_path = list(uploaded.keys())[0]

img = image.load_img(img_path, target_size=IMG_SIZE)
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
# apply MobileNetV2 preprocess
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
x = preprocess_input(x)

pred = model.predict(x)[0]
pred_idx = int(np.argmax(pred))
inv_map = {v:k for k,v in train_gen.class_indices.items()}
print("Predicted class:", inv_map[pred_idx])
print("Confidence:", float(np.max(pred)))


In [None]:
# Colab cell 11 - fine-tuning (optional)
base_model.trainable = True
# Fine-tune from this layer onwards (tweak)
fine_tune_at = 100
for layer in base_model.layers[:fine_tune_at]:
    layer.trainable = False

model.compile(optimizer=tf.keras.optimizers.Adam(1e-5),
              loss='categorical_crossentropy', metrics=['accuracy'])

history_fine = model.fit(train_gen, validation_data=valid_gen, epochs=5, callbacks=callbacks)
