In [1]:
import os
import json
import yaml
import pickle
import random
import datetime
from pathlib import Path

import numpy as np
import pandas as pd
from tqdm import tqdm

import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.applications.efficientnet import preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

# ----------------------------
# CONFIG (Windows raw strings)
# ----------------------------
DATA_DIRS = [
    r"C:\Users\sagni\Downloads\Plastic Detector\archive\dataset-resized\plastic",
    r"C:\Users\sagni\Downloads\Plastic Detector\archive\dataset-resized\trash",
    r"C:\Users\sagni\Downloads\Plastic Detector\archive\dataset-resized\paper",
    r"C:\Users\sagni\Downloads\Plastic Detector\archive\dataset-resized\metal",
    r"C:\Users\sagni\Downloads\Plastic Detector\archive\dataset-resized\glass",
    r"C:\Users\sagni\Downloads\Plastic Detector\archive\dataset-resized\cardboard",
]
# Parent directory that contains all the class folders:
DATA_ROOT = str(Path(DATA_DIRS[0]).parent)  # ...\archive\dataset-resized

OUTPUT_DIR = r"C:\Users\sagni\Downloads\Plastic Detector"
MODEL_H5 = str(Path(OUTPUT_DIR) / "model.h5")
CLASS_PKL = str(Path(OUTPUT_DIR) / "class_indices.pkl")
RUN_YAML  = str(Path(OUTPUT_DIR) / "run_config.yaml")
METRICS_JSON = str(Path(OUTPUT_DIR) / "metrics.json")
VAL_PRED_JSON = str(Path(OUTPUT_DIR) / "val_predictions.json")

# Training hyperparams
IMG_SIZE = (224, 224)
BATCH_SIZE = 32
EPOCHS = 12
VAL_SPLIT = 0.2
SEED = 42
LEARNING_RATE = 1e-3
AUGMENT = True  # Turn off if you want pure baseline

# ----------------------------
# Reproducibility
# ----------------------------
def set_seed(seed=SEED):
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)

set_seed(SEED)

# ----------------------------
# Prepare output directory
# ----------------------------
Path(OUTPUT_DIR).mkdir(parents=True, exist_ok=True)

# ----------------------------
# Verify folders exist
# ----------------------------
expected_classes = sorted([Path(p).name for p in DATA_DIRS])
if not Path(DATA_ROOT).exists():
    raise FileNotFoundError(f"DATA_ROOT not found: {DATA_ROOT}")
for p in DATA_DIRS:
    if not Path(p).exists():
        raise FileNotFoundError(f"Class folder missing: {p}")

print("[INFO] Data root:", DATA_ROOT)
print("[INFO] Classes:", expected_classes)

# ----------------------------
# Data generators
# ----------------------------
if AUGMENT:
    train_datagen = ImageDataGenerator(
        preprocessing_function=preprocess_input,
        validation_split=VAL_SPLIT,
        rotation_range=15,
        width_shift_range=0.1,
        height_shift_range=0.1,
        zoom_range=0.1,
        horizontal_flip=True,
        fill_mode="nearest",
    )
else:
    train_datagen = ImageDataGenerator(
        preprocessing_function=preprocess_input,
        validation_split=VAL_SPLIT,
    )

val_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    validation_split=VAL_SPLIT
)

# flow_from_directory requires the structure:
# DATA_ROOT/
#   cardboard/
#   glass/
#   metal/
#   paper/
#   plastic/
#   trash/
# which matches your paths inside dataset-resized
train_gen = train_datagen.flow_from_directory(
    DATA_ROOT,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    classes=expected_classes,     # lock class order to your folders
    class_mode="categorical",
    shuffle=True,
    subset="training",
    seed=SEED
)

val_gen = val_datagen.flow_from_directory(
    DATA_ROOT,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    classes=expected_classes,
    class_mode="categorical",
    shuffle=False,
    subset="validation",
    seed=SEED
)

num_classes = len(train_gen.class_indices)
print("[INFO] Class indices:", train_gen.class_indices)

# ----------------------------
# Build model (EfficientNetB0)
# ----------------------------
with tf.device("/GPU:0" if tf.config.list_physical_devices("GPU") else "/CPU:0"):
    base = EfficientNetB0(include_top=False, input_shape=(*IMG_SIZE, 3), weights="imagenet")
    base.trainable = False  # freeze backbone first

    inputs = layers.Input(shape=(*IMG_SIZE, 3))
    x = base(inputs, training=False)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dropout(0.2)(x)
    outputs = layers.Dense(num_classes, activation="softmax")(x)
    model = models.Model(inputs, outputs)

    opt = tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE)
    model.compile(optimizer=opt, loss="categorical_crossentropy", metrics=["accuracy"])
    model.summary()

# ----------------------------
# Callbacks
# ----------------------------
callbacks = [
    EarlyStopping(monitor="val_accuracy", patience=3, restore_best_weights=True),
    ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=2, min_lr=1e-6, verbose=1),
    ModelCheckpoint(MODEL_H5, monitor="val_accuracy", save_best_only=True, verbose=1)
]

# ----------------------------
# Train
# ----------------------------
history = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=EPOCHS,
    callbacks=callbacks,
    verbose=1
)

# Optionally unfreeze and fine-tune last blocks (quick fine-tune)
# Uncomment to fine-tune:
# base.trainable = True
# for layer in base.layers[:-40]:
#     layer.trainable = False
# model.compile(optimizer=tf.keras.optimizers.Adam(1e-4),
#               loss="categorical_crossentropy",
#               metrics=["accuracy"])
# history_ft = model.fit(train_gen, validation_data=val_gen, epochs=4, callbacks=callbacks, verbose=1)

# Ensure best model saved (ModelCheckpoint already did); also save current graph as backup
model.save(MODEL_H5)
print(f"[INFO] Saved model: {MODEL_H5}")

# ----------------------------
# Save class indices (PKL)
# ----------------------------
with open(CLASS_PKL, "wb") as f:
    pickle.dump(train_gen.class_indices, f)
print(f"[INFO] Saved class indices: {CLASS_PKL}")

# ----------------------------
# Compute and save metrics JSON
# ----------------------------
final_train_acc = float(history.history["accuracy"][-1])
final_train_loss = float(history.history["loss"][-1])
final_val_acc = float(history.history["val_accuracy"][-1])
final_val_loss = float(history.history["val_loss"][-1])

metrics_payload = {
    "timestamp": datetime.datetime.now().isoformat(),
    "epochs_run": len(history.history["loss"]),
    "final": {
        "train_accuracy": final_train_acc,
        "train_loss": final_train_loss,
        "val_accuracy": final_val_acc,
        "val_loss": final_val_loss
    },
    "history": {k: [float(x) for x in v] for k, v in history.history.items()}
}

with open(METRICS_JSON, "w", encoding="utf-8") as f:
    json.dump(metrics_payload, f, indent=2)
print(f"[INFO] Saved metrics: {METRICS_JSON}")

# ----------------------------
# Make & save validation predictions (filenames + top1)
# ----------------------------
# Note: val_gen.filenames holds relative paths; classes mapping in class_indices
idx_to_class = {v: k for k, v in train_gen.class_indices.items()}

val_gen.reset()
all_probs = model.predict(val_gen, verbose=1)
top1_idx = np.argmax(all_probs, axis=1)
top1_conf = np.max(all_probs, axis=1)

val_records = []
for rel_path, pred_i, conf in zip(val_gen.filenames, top1_idx, top1_conf):
    val_records.append({
        "file": rel_path.replace("\\", "/"),
        "pred_class": idx_to_class[int(pred_i)],
        "confidence": float(conf)
    })

with open(VAL_PRED_JSON, "w", encoding="utf-8") as f:
    json.dump(val_records, f, indent=2)
print(f"[INFO] Saved validation predictions: {VAL_PRED_JSON}")

# ----------------------------
# Save YAML run config
# ----------------------------
run_cfg = {
    "run": {
        "timestamp": datetime.datetime.now().isoformat(),
        "seed": SEED
    },
    "data": {
        "data_root": DATA_ROOT,
        "class_dirs": DATA_DIRS,
        "classes": expected_classes,
        "val_split": VAL_SPLIT,
        "image_size": list(IMG_SIZE),
        "batch_size": BATCH_SIZE,
        "augment": AUGMENT
    },
    "model": {
        "architecture": "EfficientNetB0",
        "transfer_learning": True,
        "frozen_base": True,
        "optimizer": "Adam",
        "learning_rate": LEARNING_RATE,
        "epochs": EPOCHS,
        "num_classes": num_classes
    },
    "artifacts": {
        "model_h5": MODEL_H5,
        "class_indices_pkl": CLASS_PKL,
        "metrics_json": METRICS_JSON,
        "val_predictions_json": VAL_PRED_JSON
    }
}

with open(RUN_YAML, "w", encoding="utf-8") as f:
    yaml.safe_dump(run_cfg, f, sort_keys=False, allow_unicode=True)
print(f"[INFO] Saved run config: {RUN_YAML}")

print("\n[DONE] All artifacts saved to:", OUTPUT_DIR)


[INFO] Data root: C:\Users\sagni\Downloads\Plastic Detector\archive\dataset-resized
[INFO] Classes: ['cardboard', 'glass', 'metal', 'paper', 'plastic', 'trash']
Found 2024 images belonging to 6 classes.
Found 503 images belonging to 6 classes.
[INFO] Class indices: {'cardboard': 0, 'glass': 1, 'metal': 2, 'paper': 3, 'plastic': 4, 'trash': 5}


  self._warn_if_super_not_called()


Epoch 1/12
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 599ms/step - accuracy: 0.5149 - loss: 1.3329
Epoch 1: val_accuracy improved from -inf to 0.75149, saving model to C:\Users\sagni\Downloads\Plastic Detector\model.h5




[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 772ms/step - accuracy: 0.5171 - loss: 1.3281 - val_accuracy: 0.7515 - val_loss: 0.7417 - learning_rate: 0.0010
Epoch 2/12
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 574ms/step - accuracy: 0.8090 - loss: 0.5856
Epoch 2: val_accuracy improved from 0.75149 to 0.79523, saving model to C:\Users\sagni\Downloads\Plastic Detector\model.h5




[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 665ms/step - accuracy: 0.8091 - loss: 0.5853 - val_accuracy: 0.7952 - val_loss: 0.5951 - learning_rate: 0.0010
Epoch 3/12
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 566ms/step - accuracy: 0.8416 - loss: 0.4920
Epoch 3: val_accuracy improved from 0.79523 to 0.80318, saving model to C:\Users\sagni\Downloads\Plastic Detector\model.h5




[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 659ms/step - accuracy: 0.8417 - loss: 0.4915 - val_accuracy: 0.8032 - val_loss: 0.5392 - learning_rate: 0.0010
Epoch 4/12
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 585ms/step - accuracy: 0.8663 - loss: 0.3918
Epoch 4: val_accuracy improved from 0.80318 to 0.81909, saving model to C:\Users\sagni\Downloads\Plastic Detector\model.h5




[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 679ms/step - accuracy: 0.8663 - loss: 0.3918 - val_accuracy: 0.8191 - val_loss: 0.5196 - learning_rate: 0.0010
Epoch 5/12
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 562ms/step - accuracy: 0.8781 - loss: 0.3590
Epoch 5: val_accuracy improved from 0.81909 to 0.82306, saving model to C:\Users\sagni\Downloads\Plastic Detector\model.h5




[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 651ms/step - accuracy: 0.8781 - loss: 0.3590 - val_accuracy: 0.8231 - val_loss: 0.4912 - learning_rate: 0.0010
Epoch 6/12
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 569ms/step - accuracy: 0.8807 - loss: 0.3371
Epoch 6: val_accuracy improved from 0.82306 to 0.83499, saving model to C:\Users\sagni\Downloads\Plastic Detector\model.h5




[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 663ms/step - accuracy: 0.8807 - loss: 0.3372 - val_accuracy: 0.8350 - val_loss: 0.4766 - learning_rate: 0.0010
Epoch 7/12
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 571ms/step - accuracy: 0.9061 - loss: 0.2780
Epoch 7: val_accuracy did not improve from 0.83499
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 655ms/step - accuracy: 0.9059 - loss: 0.2785 - val_accuracy: 0.8330 - val_loss: 0.4677 - learning_rate: 0.0010
Epoch 8/12
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 568ms/step - accuracy: 0.9108 - loss: 0.2917
Epoch 8: val_accuracy did not improve from 0.83499
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 660ms/step - accuracy: 0.9108 - loss: 0.2917 - val_accuracy: 0.8270 - val_loss: 0.4720 - learning_rate: 0.0010
Epoch 9/12
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 568ms/step - accuracy: 0.9094 - loss: 0.2835
Epoch 9: v



[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 663ms/step - accuracy: 0.9094 - loss: 0.2834 - val_accuracy: 0.8410 - val_loss: 0.4447 - learning_rate: 0.0010
Epoch 10/12
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 564ms/step - accuracy: 0.9117 - loss: 0.2687
Epoch 10: val_accuracy improved from 0.84095 to 0.86084, saving model to C:\Users\sagni\Downloads\Plastic Detector\model.h5




[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 654ms/step - accuracy: 0.9118 - loss: 0.2684 - val_accuracy: 0.8608 - val_loss: 0.4439 - learning_rate: 0.0010
Epoch 11/12
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.9145 - loss: 0.2503
Epoch 11: val_accuracy did not improve from 0.86084
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m103s[0m 2s/step - accuracy: 0.9146 - loss: 0.2503 - val_accuracy: 0.8569 - val_loss: 0.4304 - learning_rate: 0.0010
Epoch 12/12
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 590ms/step - accuracy: 0.9209 - loss: 0.2471
Epoch 12: val_accuracy did not improve from 0.86084
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 675ms/step - accuracy: 0.9210 - loss: 0.2469 - val_accuracy: 0.8509 - val_loss: 0.4264 - learning_rate: 0.0010




[INFO] Saved model: C:\Users\sagni\Downloads\Plastic Detector\model.h5
[INFO] Saved class indices: C:\Users\sagni\Downloads\Plastic Detector\class_indices.pkl
[INFO] Saved metrics: C:\Users\sagni\Downloads\Plastic Detector\metrics.json
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 643ms/step
[INFO] Saved validation predictions: C:\Users\sagni\Downloads\Plastic Detector\val_predictions.json
[INFO] Saved run config: C:\Users\sagni\Downloads\Plastic Detector\run_config.yaml

[DONE] All artifacts saved to: C:\Users\sagni\Downloads\Plastic Detector
