In [None]:
!pip install tensorflow scikit-learn

import tensorflow as tf
import numpy as np
import os
from collections import Counter
from sklearn.metrics import classification_report
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import layers, Sequential as Seq
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

from google.colab import drive
drive.mount('/content/drive')

# ---------------------------
# PATHS
# ---------------------------
train_dir = "/content/drive/MyDrive/database/train"
val_dir = "/content/drive/MyDrive/database/val"
test_dir = "/content/drive/MyDrive/database/test"

# ---------------------------
# LOAD DATASETS
# ---------------------------
IMG_SIZE = (224, 224)
BATCH_SIZE = 32

train_data = tf.keras.utils.image_dataset_from_directory(
    train_dir, image_size=IMG_SIZE, batch_size=BATCH_SIZE
)
val_data = tf.keras.utils.image_dataset_from_directory(
    val_dir, image_size=IMG_SIZE, batch_size=BATCH_SIZE
)
test_data = tf.keras.utils.image_dataset_from_directory(
    test_dir, image_size=IMG_SIZE, batch_size=BATCH_SIZE
)

num_classes = len(train_data.class_names)
print("Classes:", train_data.class_names)

# ---------------------------
# NORMALIZATION
# ---------------------------
train_data = train_data.map(lambda x, y: (x / 255.0, y))
val_data = val_data.map(lambda x, y: (x / 255.0, y))
test_data = test_data.map(lambda x, y: (x / 255.0, y))

AUTOTUNE = tf.data.AUTOTUNE
train_data = train_data.shuffle(1000).prefetch(AUTOTUNE)
val_data = val_data.prefetch(AUTOTUNE)
test_data = test_data.prefetch(AUTOTUNE)

# ---------------------------
# CLASS WEIGHTS
# ---------------------------
counter = Counter()
for cls in os.listdir(train_dir):
    cls_path = os.path.join(train_dir, cls)
    if os.path.isdir(cls_path):
        counter[int(cls)] += len(os.listdir(cls_path))

print("Class counts:", counter)

classes = np.array(list(counter.keys()))
counts = np.array(list(counter.values()))
total = counts.sum()
class_weights = total / (len(classes) * counts)
class_weights_dict = dict(zip(classes, class_weights))

print("Class weights:", class_weights_dict)

# ---------------------------
# DATA AUGMENTATION
# ---------------------------
data_augmentation = Seq([
    layers.RandomFlip("horizontal_and_vertical"),
    layers.RandomRotation(0.2),
    layers.RandomZoom(0.1)
])

train_data_aug = train_data.map(lambda x, y: (data_augmentation(x, training=True), y))

# ---------------------------
# MODEL BUILD
# ---------------------------
base_model = MobileNetV2(
    weights='imagenet', include_top=False, input_shape=(224, 224, 3)
)
base_model.trainable = False

model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dense(512, activation='relu'),
    Dropout(0.3),
    Dense(num_classes, activation='softmax')
])

model.compile(
    optimizer=Adam(1e-4),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()

# ---------------------------
# CALLBACKS
model = tf.keras.models.load_model('/content/drive/MyDrive/pest_detection_epoch_10.keras')
checkpoint = ModelCheckpoint(
    '/content/drive/MyDrive/pest_detection_epoch_{epoch:02d}.keras',
    save_best_only=False,        # <—— IMPORTANT
    save_weights_only=False,
    verbose=1
)


early_stop = EarlyStopping(
    monitor='val_loss', patience=5, restore_best_weights=True
)

reduce_lr = ReduceLROnPlateau(
    monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6
)
model.optimizer.learning_rate.assign(1e-4)

# ---------------------------
# TRAIN TOP LAYERS
# ---------------------------
history =model.fit(
    train_data_aug,
    validation_data=val_data,
    class_weight=class_weights_dict,
    epochs=50,      # or more
    initial_epoch=10,  # this is OK because you already loaded weights
    callbacks=[checkpoint, early_stop, reduce_lr]
)


# ---------------------------
# FINE-TUNE (UNFREEZE LAST LAYERS)
# ---------------------------
base_model.trainable = True
for layer in base_model.layers[:-20]:
    layer.trainable = False

model.compile(
    optimizer=Adam(1e-5),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

history_fine = model.fit(
    train_data_aug,
    validation_data=val_data,
    epochs=50,
    class_weight=class_weights_dict,
    callbacks=[checkpoint, early_stop, reduce_lr]
)

# ---------------------------
# EVALUATE
# ---------------------------
loss, acc = model.evaluate(test_data)
print(f"✅ Test Accuracy: {acc:.4f}")

# ---------------------------
# CLASSIFICATION REPORT
# ---------------------------
y_true = np.concatenate([y for x, y in test_data], axis=0)
y_pred = np.argmax(model.predict(test_data), axis=1)

print(classification_report(
    y_true, y_pred,
    target_names=[f"Class {i}" for i in range(num_classes)]
))

# ---------------------------
# SAVE FINAL MODEL
# ---------------------------
model.save("/content/drive/MyDrive/pest_identifier_model.keras")
print("✅ Saved as pest_identifier_model.keras")


Collecting tensorflow
  Downloading tensorflow-2.20.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.5 kB)
Collecting astunparse>=1.6.0 (from tensorflow)
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=24.3.25 (from tensorflow)
  Downloading flatbuffers-25.9.23-py2.py3-none-any.whl.metadata (875 bytes)
Collecting google_pasta>=0.1.1 (from tensorflow)
  Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting libclang>=13.0.0 (from tensorflow)
  Downloading libclang-18.1.1-py2.py3-none-manylinux2010_x86_64.whl.metadata (5.2 kB)
Collecting tensorboard~=2.20.0 (from tensorflow)
  Downloading tensorboard-2.20.0-py3-none-any.whl.metadata (1.8 kB)
Collecting wheel<1.0,>=0.23.0 (from astunparse>=1.6.0->tensorflow)
  Downloading wheel-0.45.1-py3-none-any.whl.metadata (2.3 kB)
Collecting tensorboard-data-server<0.8.0,>=0.7.0 (from tensorboard~=2.20.0->tensorflow)
  Downloading tensorboard_data_server-0.



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Found 58285 files belonging to 102 classes.
Found 7508 files belonging to 102 classes.
Found 22659 files belonging to 102 classes.
Classes: ['000', '001', '002', '003', '004', '005', '006', '007', '008', '009', '010', '011', '012', '013', '014', '015', '016', '017', '018', '019', '020', '021', '022', '023', '024', '025', '026', '027', '028', '029', '030', '031', '032', '033', '034', '035', '036', '037', '038', '039', '040', '041', '042', '043', '044', '045', '046', '047', '048', '049', '050', '051', '052', '053', '054', '055', '056', '057', '058', '059', '060', '061', '062', '063', '064', '065', '066', '067', '068', '069', '070', '071', '072', '073', '074', '075', '076', '077', '078', '079', '080', '081', '082', '083', '084', '085', '086', '087', '088', '089', '090', '091', '092', '093', '094', '095', '096', '097', '098', '099', '100', '101']
Class counts: Co

Epoch 11/50
[1m1822/1822[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.5253 - loss: 1.7623