In [5]:
## 1. PRE-REQUISITES AND IMPORTS (LOCAL EXECUTION)

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
import os
import numpy as np

# --- 1. CONFIGURATION ---
IMAGE_SIZE = (224, 224)
BATCH_SIZE = 32
NUM_CLASSES = 6
LEARNING_RATE_HEAD = 0.001
LEARNING_RATE_FINE_TUNE = 0.00001
FINE_TUNE_LAYERS_START_INDEX = 100

# --- 2. LOCAL DATASET PATHS ---
# CRITICAL: These paths assume your dataset is already extracted into the project's ml/dataset folder.
# We use relative paths from the root of the Eco_Connect project (if running from that root)
BASE_DIR = './ml/dataset/'

# Assuming the folders are directly under 'dataset/'
TRAIN_DIR = os.path.join(BASE_DIR, 'training')
VAL_DIR = os.path.join(BASE_DIR, 'validation')

# Final save locations (relative to where you execute the script, assuming Eco_Connect root)
MODEL_SAVE_PATH = 'backend/model/eco_connect_model.h5' 
LABEL_SAVE_PATH = 'ml/labels.txt'

print(f"Base directory for data: {BASE_DIR}")
print(f"Checking Training Directory existence: {os.path.exists(TRAIN_DIR)}")


# --- 3. DATA LOADING AND AUGMENTATION ---
# Check data existence before proceeding
if not os.path.exists(TRAIN_DIR):
    raise FileNotFoundError(
        f"Training data not found at: {TRAIN_DIR}. Please extract your Kaggle dataset here."
    )

train_datagen = ImageDataGenerator(
    preprocessing_function=tf.keras.applications.mobilenet_v2.preprocess_input,
    rotation_range=20, width_shift_range=0.2, height_shift_range=0.2,
    shear_range=0.2, zoom_range=0.2, horizontal_flip=True
)

val_datagen = ImageDataGenerator(
    preprocessing_function=tf.keras.applications.mobilenet_v2.preprocess_input
)

# Load data from directories
print(f"Loading training data from: {TRAIN_DIR}")
train_generator = train_datagen.flow_from_directory(
    TRAIN_DIR, target_size=IMAGE_SIZE, batch_size=BATCH_SIZE, class_mode='categorical'
)

print(f"Loading validation data from: {VAL_DIR}")
validation_generator = val_datagen.flow_from_directory(
    VAL_DIR, target_size=IMAGE_SIZE, batch_size=BATCH_SIZE, class_mode='categorical'
)


# --- 4. MODEL ARCHITECTURE (TRANSFER LEARNING) ---
base_model = MobileNetV2(
    input_shape=IMAGE_SIZE + (3,), include_top=False, weights='imagenet'
)
base_model.trainable = False

# Build the New Classification Head
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(NUM_CLASSES, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)


# --- 5. INITIAL TRAINING (FEATURE EXTRACTION - Training the Head Only) ---
model.compile(
    optimizer=Adam(learning_rate=LEARNING_RATE_HEAD),
    loss='categorical_crossentropy', metrics=['accuracy']
)

print("\n--- Phase 1: Initial Training (Head Layers Only) ---")
# WARNING: Running this locally will be very slow without a GPU.
history_initial = model.fit(
    train_generator,
    epochs=5, # Using 5 epochs (recommended baseline)
    validation_data=validation_generator
)


# --- 6. FINE-TUNING (SPECIALIZATION) ---
base_model.trainable = True
for layer in base_model.layers[:FINE_TUNE_LAYERS_START_INDEX]:
    layer.trainable = False

model.compile(
    optimizer=Adam(learning_rate=LEARNING_RATE_FINE_TUNE),
    loss='categorical_crossentropy', metrics=['accuracy']
)

print("\n--- Phase 2: Fine-Tuning (Top Layers Unfrozen) ---")
# WARNING: Running this locally will be very slow without a GPU.
history_fine_tune = model.fit(
    train_generator,
    epochs=history_initial.epoch[-1] + 15, # 15 additional epochs
    initial_epoch=history_initial.epoch[-1],
    validation_data=validation_generator
)

# --- 7. SAVING AND EXPORTING ---
model_save_dir = os.path.dirname(MODEL_SAVE_PATH)
if not os.path.exists(model_save_dir):
    os.makedirs(model_save_dir)

print(f"\nSaving final model to: {MODEL_SAVE_PATH}")
model.save(MODEL_SAVE_PATH)

class_labels = list(train_generator.class_indices.keys())

label_save_dir = os.path.dirname(LABEL_SAVE_PATH)
if not os.path.exists(label_save_dir):
    os.makedirs(label_save_dir)

with open(LABEL_SAVE_PATH, 'w') as f:
    for label in class_labels:
        f.write(f"{label}\n")

print("\nTraining completed successfully! Model and labels saved.")

Base directory for data: ./ml/dataset/
Checking Training Directory existence: False


FileNotFoundError: Training data not found at: ./ml/dataset/training. Please extract your Kaggle dataset here.